From 32a0d24c419b0b06a5f5e5addb3246dd10ea1de8 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Mon, 26 Aug 2024 13:48:48 -0600 Subject: [PATCH 01/37] done --- src/interface/metadata.cpp | 7 +++++++ src/interface/metadata.hpp | 2 ++ src/interface/state_descriptor.cpp | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/interface/metadata.cpp b/src/interface/metadata.cpp index fce88334c652..e5bac4f9365a 100644 --- a/src/interface/metadata.cpp +++ b/src/interface/metadata.cpp @@ -263,6 +263,13 @@ bool Metadata::IsValid(bool throw_on_fail) const { } } + // Associated fluxes + if (IsSet(FluxNotOneCopy)) { + PARTHENON_REQUIRE( + IsSet(WithFluxes), + "Asking for non-OneCopy associated fluxes without asking for associated fluxes."); + } + return valid; } diff --git a/src/interface/metadata.hpp b/src/interface/metadata.hpp index 5770323c21ba..f64901c71b63 100644 --- a/src/interface/metadata.hpp +++ b/src/interface/metadata.hpp @@ -120,6 +120,8 @@ PARTHENON_INTERNAL_FOR_FLAG(Fine) \ /** this variable is the flux for another variable **/ \ PARTHENON_INTERNAL_FOR_FLAG(Flux) \ + /** allocate a separate flux array for each stage if WithFluxes is specified**/ \ + PARTHENON_INTERNAL_FOR_FLAG(FluxNotOneCopy) \ /************************************************/ \ /** Vars specifying coordinates for visualization purposes **/ \ /** You can specify a single 3D var **/ \ diff --git a/src/interface/state_descriptor.cpp b/src/interface/state_descriptor.cpp index aa3fa5e6af27..84619d78d28d 100644 --- a/src/interface/state_descriptor.cpp +++ b/src/interface/state_descriptor.cpp @@ -274,7 +274,8 @@ bool StateDescriptor::AddFieldImpl(const VarID &vid, const Metadata &m_in, return false; // this field has already been added } else { if (m.IsSet(Metadata::WithFluxes) && m.GetFluxName() == "") { - std::vector mFlags = {Metadata::OneCopy, Metadata::Flux}; + std::vector mFlags = {Metadata::Flux}; + if (!m.IsSet(Metadata::FluxNotOneCopy)) mFlags.push_back(Metadata::OneCopy); if (m.IsSet(Metadata::Sparse)) mFlags.push_back(Metadata::Sparse); if (m.IsSet(Metadata::Fine)) mFlags.push_back(Metadata::Fine); if (m.IsSet(Metadata::Cell)) From fb8024787c4711c1b275e326c8ebec94b5a4934a Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Mon, 26 Aug 2024 13:51:31 -0600 Subject: [PATCH 02/37] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f474633ce8e..072f1d14cecf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) +- [[PR 1161]]((https://github.com/parthenon-hpc-lab/parthenon/pull/1161)) Add Metadata::FluxNotOneCopy - [[PR 1151]]((https://github.com/parthenon-hpc-lab/parthenon/pull/1151)) Add time offset `c` to LowStorageIntegrator - [[PR 1147]](https://github.com/parthenon-hpc-lab/parthenon/pull/1147) Add `par_reduce_inner` functions - [[PR 1159]](https://github.com/parthenon-hpc-lab/parthenon/pull/1159) Add additional timestep controllers in parthenon/time. From 5c9f88c1db056fbc63ae896a603b3266f72d8151 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 28 Aug 2024 19:23:46 -0600 Subject: [PATCH 03/37] Add CellMemAligned flag --- src/interface/metadata.cpp | 17 ++++++++++------- src/interface/state_descriptor.cpp | 9 +++++---- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/interface/metadata.cpp b/src/interface/metadata.cpp index e5bac4f9365a..52e73798e177 100644 --- a/src/interface/metadata.cpp +++ b/src/interface/metadata.cpp @@ -251,6 +251,13 @@ bool Metadata::IsValid(bool throw_on_fail) const { PARTHENON_THROW("Either the Independent or Derived flag must be set"); } } + + if (IsSet(FillGhost) && IsSet(CellMemAligned) && (!IsSet(Cell))) { + valid = false; + if (throw_on_fail) { + PARTHENON_THROW("Cannot communicate ghosts of non-cell fields that have cell aligned memory."); + } + } // Prolongation/restriction if (HasRefinementOps()) { @@ -313,19 +320,15 @@ Metadata::GetArrayDims(std::weak_ptr wpmb, bool coarse) const { arrDims[i + 3] = 1; if (IsSet(Cell)) { arrDims[MAX_VARIABLE_DIMENSION - 1] = 1; // Only one cell center per cell - } else if (IsSet(Face) && IsSet(Flux)) { - // 3 directions but keep the same ijk shape as cell var for performance - arrDims[MAX_VARIABLE_DIMENSION - 1] = 3; } else if (IsSet(Face) || IsSet(Edge)) { arrDims[MAX_VARIABLE_DIMENSION - 1] = 3; // Three faces and edges per cell - arrDims[0]++; - if (arrDims[1] > 1) arrDims[1]++; - if (arrDims[2] > 1) arrDims[2]++; } else if (IsSet(Node)) { arrDims[MAX_VARIABLE_DIMENSION - 1] = 1; // Only one lower left node per cell + } + if (!IsSet(CellMemAligned) && !IsSet(Cell)) { arrDims[0]++; if (arrDims[1] > 1) arrDims[1]++; - if (arrDims[2] > 1) arrDims[2]++; + if (arrDims[2] > 1) arrDims[2]++; } } else if (IsSet(Particle)) { assert(N >= 0 && N <= MAX_VARIABLE_DIMENSION - 1); diff --git a/src/interface/state_descriptor.cpp b/src/interface/state_descriptor.cpp index 84619d78d28d..8b82bf55b4a4 100644 --- a/src/interface/state_descriptor.cpp +++ b/src/interface/state_descriptor.cpp @@ -278,13 +278,14 @@ bool StateDescriptor::AddFieldImpl(const VarID &vid, const Metadata &m_in, if (!m.IsSet(Metadata::FluxNotOneCopy)) mFlags.push_back(Metadata::OneCopy); if (m.IsSet(Metadata::Sparse)) mFlags.push_back(Metadata::Sparse); if (m.IsSet(Metadata::Fine)) mFlags.push_back(Metadata::Fine); - if (m.IsSet(Metadata::Cell)) + if (m.IsSet(Metadata::Cell)) { mFlags.push_back(Metadata::Face); - else if (m.IsSet(Metadata::Face)) + mFlags.push_back(Metadata::CellMemAligned); + } else if (m.IsSet(Metadata::Face)) { mFlags.push_back(Metadata::Edge); - else if (m.IsSet(Metadata::Edge)) + } else if (m.IsSet(Metadata::Edge)) { mFlags.push_back(Metadata::Node); - + } Metadata mf; if (m.GetRefinementFunctions().label().size() > 0) { // Propagate custom refinement ops to flux field From c664973263804c1fdf96f02380ee30574ff9ceb0 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 28 Aug 2024 19:25:37 -0600 Subject: [PATCH 04/37] Only allocate after all received --- src/bvals/comms/boundary_communication.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bvals/comms/boundary_communication.cpp b/src/bvals/comms/boundary_communication.cpp index 1f238b1fdbf7..78121cd3fac9 100644 --- a/src/bvals/comms/boundary_communication.cpp +++ b/src/bvals/comms/boundary_communication.cpp @@ -213,7 +213,7 @@ TaskStatus ReceiveBoundBufs(std::shared_ptr> &md) { [&all_received](auto pbuf) { all_received = pbuf->TryReceive() && all_received; }); int ibound = 0; - if (Globals::sparse_config.enabled) { + if (Globals::sparse_config.enabled && all_received) { ForEachBoundary( md, [&](auto pmb, sp_mbd_t rc, nb_t &nb, const sp_cv_t v) { const std::size_t ibuf = cache.idx_vec[ibound]; From 7ee64750be4dd12ae95a5c605fde323c5b91cc9e Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 28 Aug 2024 19:26:16 -0600 Subject: [PATCH 05/37] Maybe speed up allocation status check --- src/interface/meshblock_data.hpp | 3 +++ src/interface/sparse_pack_base.cpp | 14 +++++--------- src/interface/sparse_pack_base.hpp | 14 ++++++++++++-- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/interface/meshblock_data.hpp b/src/interface/meshblock_data.hpp index c7133317bd76..a0fb115b1189 100644 --- a/src/interface/meshblock_data.hpp +++ b/src/interface/meshblock_data.hpp @@ -76,6 +76,9 @@ class MeshBlockData { MeshBlock *GetParentPointer() const { return GetBlockPointer(); } void SetAllowedDt(const Real dt) const { GetBlockPointer()->SetAllowedDt(dt); } Mesh *GetMeshPointer() const { return GetBlockPointer()->pmy_mesh; } + + // This mirrors a MeshBlockData routine + int NumBlocks() const { return 1;} template IndexRange GetBoundsI(Ts &&...args) const { diff --git a/src/interface/sparse_pack_base.cpp b/src/interface/sparse_pack_base.cpp index d5a66db01498..d85deced41c9 100644 --- a/src/interface/sparse_pack_base.cpp +++ b/src/interface/sparse_pack_base.cpp @@ -67,19 +67,15 @@ SparsePackBase::GetAllocStatus(T *pmd, const PackDescriptor &desc, const std::vector &include_block) { using mbd_t = MeshBlockData; - int nvar = desc.nvar_groups; - - std::vector astat; + const int nvar = desc.nvar_groups; + const int nblock = pmd->NumBlocks(); + std::vector astat(nblock * desc.nvar_tot); + int idx = 0; ForEachBlock(pmd, include_block, [&](int b, mbd_t *pmbd) { const auto &uid_map = pmbd->GetUidMap(); for (int i = 0; i < nvar; ++i) { for (const auto &[var_name, uid] : desc.var_groups[i]) { - if (uid_map.count(uid) > 0) { - const auto pv = uid_map.at(uid); - astat.push_back(pv->GetAllocationStatus()); - } else { - astat.push_back(-1); - } + astat[idx++] = uid_map.count(uid) > 0 ? (uid_map.at(uid))->GetAllocationStatus() : -1; } } }); diff --git a/src/interface/sparse_pack_base.hpp b/src/interface/sparse_pack_base.hpp index 53fc4e37d0b1..7e0e524c4db5 100644 --- a/src/interface/sparse_pack_base.hpp +++ b/src/interface/sparse_pack_base.hpp @@ -138,7 +138,7 @@ struct PackDescriptor { // default constructor needed for certain use cases PackDescriptor() : nvar_groups(0), var_group_names({}), var_groups({}), with_fluxes(false), - coarse(false), flat(false), identifier("") {} + coarse(false), flat(false), identifier(""), nvar_tot(0) {} template PackDescriptor(StateDescriptor *psd, const std::vector &var_groups_in, @@ -147,7 +147,7 @@ struct PackDescriptor { var_groups(BuildUids(var_groups_in.size(), psd, selector)), with_fluxes(options.count(PDOpt::WithFluxes)), coarse(options.count(PDOpt::Coarse)), flat(options.count(PDOpt::Flatten)), - identifier(GetIdentifier()) { + identifier(GetIdentifier()), nvar_tot(GetNVarsTotal(var_groups)) { PARTHENON_REQUIRE(!(with_fluxes && coarse), "Probably shouldn't be making a coarse pack with fine fluxes."); } @@ -159,8 +159,18 @@ struct PackDescriptor { const bool coarse; const bool flat; const std::string identifier; + const std::size_t nvar_tot; private: + static int GetNVarsTotal(const std::vector &var_groups) { + int nvar_tot = 0; + for (const auto &group : var_groups) { + for (const auto &[a, b] : group) { + nvar_tot++; + } + } + return nvar_tot; + } std::string GetIdentifier() { std::string ident(""); for (const auto &vgroup : var_groups) { From f31cce4d261cbb326a7031a00186494b0c94f489 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 28 Aug 2024 19:26:41 -0600 Subject: [PATCH 06/37] Actually include the flag --- src/interface/metadata.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/interface/metadata.hpp b/src/interface/metadata.hpp index f64901c71b63..a6008625072d 100644 --- a/src/interface/metadata.hpp +++ b/src/interface/metadata.hpp @@ -122,6 +122,9 @@ PARTHENON_INTERNAL_FOR_FLAG(Flux) \ /** allocate a separate flux array for each stage if WithFluxes is specified**/ \ PARTHENON_INTERNAL_FOR_FLAG(FluxNotOneCopy) \ + /** Align memory of fields to cell centered memory \ + (Field will be missing one layer of ghosts if it is not cell centered) **/ \ + PARTHENON_INTERNAL_FOR_FLAG(CellMemAligned) \ /************************************************/ \ /** Vars specifying coordinates for visualization purposes **/ \ /** You can specify a single 3D var **/ \ From b2d75253575774ace1eefe480eb46d8fa5b61210 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 4 Sep 2024 11:12:10 +0200 Subject: [PATCH 07/37] Bump OPMD version and add delim --- CMakeLists.txt | 2 +- src/outputs/parthenon_opmd.cpp | 3 ++- src/outputs/parthenon_opmd.hpp | 7 +++++++ src/outputs/restart_opmd.cpp | 11 ++++++++++- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eda4bee63126..5c4435b04c5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -205,7 +205,7 @@ if (PARTHENON_ENABLE_OPENPMD) FetchContent_Declare(openPMD GIT_REPOSITORY "https://github.com/openPMD/openPMD-api.git" # we need newer than the latest 0.15.2 release to support writing attriutes from a subset of ranks - GIT_TAG "bda3544") # develop as of 2024-07-12 + GIT_TAG "1c7d7ff") # develop as of 2024-09-02 FetchContent_MakeAvailable(openPMD) install(TARGETS openPMD EXPORT parthenonTargets) endif() diff --git a/src/outputs/parthenon_opmd.cpp b/src/outputs/parthenon_opmd.cpp index 8e1268ef4413..2526728b9c8d 100644 --- a/src/outputs/parthenon_opmd.cpp +++ b/src/outputs/parthenon_opmd.cpp @@ -65,7 +65,8 @@ using namespace OutputUtils; template void WriteAllParamsOfType(std::shared_ptr pkg, openPMD::Iteration *it) { - const std::string prefix = "Params/" + pkg->label() + "/"; + using OpenPMDUtils::delim; + const std::string prefix = "Params" + delim + pkg->label() + delim; const auto ¶ms = pkg->AllParams(); for (const auto &key : params.GetKeys()) { const auto type = params.GetType(key); diff --git a/src/outputs/parthenon_opmd.hpp b/src/outputs/parthenon_opmd.hpp index 94a9161ec424..2c7035dd13e9 100644 --- a/src/outputs/parthenon_opmd.hpp +++ b/src/outputs/parthenon_opmd.hpp @@ -20,6 +20,13 @@ namespace parthenon { namespace OpenPMDUtils { +// Deliminter to separate packages and parameters in attributes. +// More or less a workaround as the OpenPMD API does currently not expose +// access to non-standard groups (such as "Params" versus the standard "meshes"). +// TODO(pgrete & reviewer) (agree on delim and add check for package name and keys) OR +// better use of opmd-api +inline static const std::string delim = "+"; + // Construct OpenPMD Mesh "record" name and comonnent identifier. // - comp_idx is a flattended index over all components of the vectors and tensors, i.e., // the typical v,u,t indices. diff --git a/src/outputs/restart_opmd.cpp b/src/outputs/restart_opmd.cpp index b6767eaa3714..8fd939f2ae84 100644 --- a/src/outputs/restart_opmd.cpp +++ b/src/outputs/restart_opmd.cpp @@ -116,11 +116,20 @@ std::size_t RestartReaderOPMD::GetSwarmCounts(const std::string &swarm, template void RestartReaderOPMD::ReadAllParamsOfType(const std::string &pkg_name, Params ¶ms) { + using OpenPMDUtils::delim; for (const auto &key : params.GetKeys()) { const auto type = params.GetType(key); auto mutability = params.GetMutability(key); if (type == std::type_index(typeid(T)) && mutability == Params::Mutability::Restart) { - auto val = it->getAttribute("Params/" + pkg_name + "/" + key).get(); + auto attrs = it->attributes(); + for (const auto & attr : attrs) { + std::cout << "Contains attribute: " << attr << std::endl; + } + std::cout << "Reading '" + << "Params" + delim + pkg_name + delim + key << "' with type: " << typeid(T).name() + << std::endl; + + auto val = it->getAttribute("Params" + delim + pkg_name + delim + key).get(); params.Update(key, val); } } From 8e8f201606c4a4fcfdc80ff61e2bc7147fb404f7 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 4 Sep 2024 11:39:39 -0600 Subject: [PATCH 08/37] format and lint --- src/interface/meshblock_data.hpp | 4 ++-- src/interface/metadata.cpp | 11 ++++++----- src/interface/metadata.hpp | 1 - src/interface/sparse_pack_base.cpp | 3 ++- src/interface/sparse_pack_base.hpp | 8 ++++---- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/interface/meshblock_data.hpp b/src/interface/meshblock_data.hpp index a0fb115b1189..9fab1f354674 100644 --- a/src/interface/meshblock_data.hpp +++ b/src/interface/meshblock_data.hpp @@ -76,9 +76,9 @@ class MeshBlockData { MeshBlock *GetParentPointer() const { return GetBlockPointer(); } void SetAllowedDt(const Real dt) const { GetBlockPointer()->SetAllowedDt(dt); } Mesh *GetMeshPointer() const { return GetBlockPointer()->pmy_mesh; } - + // This mirrors a MeshBlockData routine - int NumBlocks() const { return 1;} + int NumBlocks() const { return 1; } template IndexRange GetBoundsI(Ts &&...args) const { diff --git a/src/interface/metadata.cpp b/src/interface/metadata.cpp index 52e73798e177..244a1860363e 100644 --- a/src/interface/metadata.cpp +++ b/src/interface/metadata.cpp @@ -251,12 +251,13 @@ bool Metadata::IsValid(bool throw_on_fail) const { PARTHENON_THROW("Either the Independent or Derived flag must be set"); } } - + if (IsSet(FillGhost) && IsSet(CellMemAligned) && (!IsSet(Cell))) { valid = false; if (throw_on_fail) { - PARTHENON_THROW("Cannot communicate ghosts of non-cell fields that have cell aligned memory."); - } + PARTHENON_THROW( + "Cannot communicate ghosts of non-cell fields that have cell aligned memory."); + } } // Prolongation/restriction @@ -325,10 +326,10 @@ Metadata::GetArrayDims(std::weak_ptr wpmb, bool coarse) const { } else if (IsSet(Node)) { arrDims[MAX_VARIABLE_DIMENSION - 1] = 1; // Only one lower left node per cell } - if (!IsSet(CellMemAligned) && !IsSet(Cell)) { + if (!IsSet(CellMemAligned) && !IsSet(Cell)) { arrDims[0]++; if (arrDims[1] > 1) arrDims[1]++; - if (arrDims[2] > 1) arrDims[2]++; + if (arrDims[2] > 1) arrDims[2]++; } } else if (IsSet(Particle)) { assert(N >= 0 && N <= MAX_VARIABLE_DIMENSION - 1); diff --git a/src/interface/metadata.hpp b/src/interface/metadata.hpp index a6008625072d..ca8afeddbe54 100644 --- a/src/interface/metadata.hpp +++ b/src/interface/metadata.hpp @@ -693,7 +693,6 @@ Set_t GetByFlag(const Metadata::FlagCollection &flags, NameMap_t &nameMap, return out; } } // namespace MetadataUtils - } // namespace parthenon #endif // INTERFACE_METADATA_HPP_ diff --git a/src/interface/sparse_pack_base.cpp b/src/interface/sparse_pack_base.cpp index d85deced41c9..d4ead84113c8 100644 --- a/src/interface/sparse_pack_base.cpp +++ b/src/interface/sparse_pack_base.cpp @@ -75,7 +75,8 @@ SparsePackBase::GetAllocStatus(T *pmd, const PackDescriptor &desc, const auto &uid_map = pmbd->GetUidMap(); for (int i = 0; i < nvar; ++i) { for (const auto &[var_name, uid] : desc.var_groups[i]) { - astat[idx++] = uid_map.count(uid) > 0 ? (uid_map.at(uid))->GetAllocationStatus() : -1; + astat[idx++] = + uid_map.count(uid) > 0 ? (uid_map.at(uid))->GetAllocationStatus() : -1; } } }); diff --git a/src/interface/sparse_pack_base.hpp b/src/interface/sparse_pack_base.hpp index 7e0e524c4db5..4b01547d7b04 100644 --- a/src/interface/sparse_pack_base.hpp +++ b/src/interface/sparse_pack_base.hpp @@ -162,10 +162,10 @@ struct PackDescriptor { const std::size_t nvar_tot; private: - static int GetNVarsTotal(const std::vector &var_groups) { - int nvar_tot = 0; - for (const auto &group : var_groups) { - for (const auto &[a, b] : group) { + static int GetNVarsTotal(const std::vector &var_groups) { + int nvar_tot = 0; + for (const auto &group : var_groups) { + for (const auto &[a, b] : group) { nvar_tot++; } } From 3ff2e8a95f820ffd5026dfc07acd311ed8100430 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 4 Sep 2024 14:01:06 -0600 Subject: [PATCH 09/37] Make flux metadata accessible at WithFlux variable creation point --- src/interface/metadata.cpp | 47 ++++++++++++++++++----- src/interface/metadata.hpp | 30 ++++++++++++--- src/interface/sparse_pool.cpp | 60 +++++++++++++++++++----------- src/interface/state_descriptor.cpp | 22 +---------- 4 files changed, 101 insertions(+), 58 deletions(-) diff --git a/src/interface/metadata.cpp b/src/interface/metadata.cpp index 244a1860363e..36f37adf8b1c 100644 --- a/src/interface/metadata.cpp +++ b/src/interface/metadata.cpp @@ -101,10 +101,13 @@ MetadataFlag Metadata::GetUserFlag(const std::string &flagname) { } namespace parthenon { -Metadata::Metadata(const std::vector &bits, const std::vector &shape, +Metadata::Metadata(const std::vector &bits, + const std::vector &flux_bits, + const std::vector &shape, const std::vector &component_labels, const std::string &associated, - const refinement::RefinementFunctions_t ref_funcs_) + const refinement::RefinementFunctions_t ref_funcs_, + const refinement::RefinementFunctions_t flux_ref_funcs_) : shape_(shape), component_labels_(component_labels), associated_(associated) { // set flags for (const auto f : bits) { @@ -164,6 +167,39 @@ Metadata::Metadata(const std::vector &bits, const std::vector deallocation_threshold_ = 0.0; default_value_ = 0.0; } + + // Now create the flux metadata if required + if (IsSet(WithFluxes)) { + std::set flux_flags; + for (const auto f : flux_bits) + flux_flags.insert(f); + + // Set some standard defaults for the flux metadata if no + // flags were provided + if (flux_flags.size() == 0) { + flux_flags.insert(OneCopy); + if (IsSet(Fine)) flux_flags.insert(Fine); + if (IsSet(Cell)) flux_flags.insert(CellMemAligned); + if (IsSet(Sparse)) flux_flags.insert(Sparse); + } + + // These flags are automatically propagated for fluxes + flux_flags.insert(Flux); + if (IsSet(Cell)) { + flux_flags.insert(Face); + } else if (IsSet(Face)) { + flux_flags.insert(Edge); + } else if (IsSet(Edge)) { + flux_flags.insert(Node); + } + + if (IsSet(Tensor)) flux_flags.insert(Tensor); + if (IsSet(Vector)) flux_flags.insert(Vector); + + std::vector flux_flags_vec(flux_flags.begin(), flux_flags.end()); + flux_metadata = std::make_shared(flux_flags_vec, shape, component_labels, + std::string(), flux_ref_funcs_); + } } std::ostream &operator<<(std::ostream &os, const parthenon::Metadata &m) { @@ -271,13 +307,6 @@ bool Metadata::IsValid(bool throw_on_fail) const { } } - // Associated fluxes - if (IsSet(FluxNotOneCopy)) { - PARTHENON_REQUIRE( - IsSet(WithFluxes), - "Asking for non-OneCopy associated fluxes without asking for associated fluxes."); - } - return valid; } diff --git a/src/interface/metadata.hpp b/src/interface/metadata.hpp index ca8afeddbe54..66d8e6496dc4 100644 --- a/src/interface/metadata.hpp +++ b/src/interface/metadata.hpp @@ -120,8 +120,6 @@ PARTHENON_INTERNAL_FOR_FLAG(Fine) \ /** this variable is the flux for another variable **/ \ PARTHENON_INTERNAL_FOR_FLAG(Flux) \ - /** allocate a separate flux array for each stage if WithFluxes is specified**/ \ - PARTHENON_INTERNAL_FOR_FLAG(FluxNotOneCopy) \ /** Align memory of fields to cell centered memory \ (Field will be missing one layer of ghosts if it is not cell centered) **/ \ PARTHENON_INTERNAL_FOR_FLAG(CellMemAligned) \ @@ -330,28 +328,48 @@ class Metadata { // 4 constructors, this is the general constructor called by all other constructors, so // we do some sanity checks here Metadata( - const std::vector &bits, const std::vector &shape = {}, + const std::vector &bits, const std::vector &flux_bits, + const std::vector &shape = {}, const std::vector &component_labels = {}, const std::string &associated = "", const refinement::RefinementFunctions_t ref_funcs_ = + refinement::RefinementFunctions_t::RegisterOps< + refinement_ops::ProlongateSharedMinMod, refinement_ops::RestrictAverage>(), + const refinement::RefinementFunctions_t flux_ref_funcs_ = refinement::RefinementFunctions_t::RegisterOps< refinement_ops::ProlongateSharedMinMod, refinement_ops::RestrictAverage>()); - // 1 constructor + Metadata( + const std::vector &bits, const std::vector &shape = {}, + const std::vector &component_labels = {}, + const std::string &associated = "", + const refinement::RefinementFunctions_t ref_funcs_ = + refinement::RefinementFunctions_t::RegisterOps< + refinement_ops::ProlongateSharedMinMod, refinement_ops::RestrictAverage>()) + : Metadata(bits, {}, shape, component_labels, associated, ref_funcs_, ref_funcs_) {} + Metadata(const std::vector &bits, const std::vector &shape, const std::string &associated) : Metadata(bits, shape, {}, associated) {} - // 2 constructors Metadata(const std::vector &bits, const std::vector component_labels, const std::string &associated = "") : Metadata(bits, {1}, component_labels, associated) {} - // 1 constructor Metadata(const std::vector &bits, const std::string &associated) : Metadata(bits, {1}, {}, associated) {} + std::shared_ptr GetSPtrFluxMetadata() { + PARTHENON_REQUIRE(IsSet(WithFluxes), + "Asking for flux metadata from metadata that doesn't have it."); + return flux_metadata; + } + + private: + std::shared_ptr flux_metadata; + + public: // Static routines static MetadataFlag AddUserFlag(const std::string &name); static bool FlagNameExists(const std::string &flagname); diff --git a/src/interface/sparse_pool.cpp b/src/interface/sparse_pool.cpp index 8a6e0ef213ca..7890cff964de 100644 --- a/src/interface/sparse_pool.cpp +++ b/src/interface/sparse_pool.cpp @@ -11,6 +11,8 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include + #include "interface/sparse_pool.hpp" #include "interface/metadata.hpp" @@ -48,34 +50,31 @@ SparsePool::SparsePool(const std::string &base_name, const Metadata &metadata, } } -const Metadata &SparsePool::AddImpl(int sparse_id, const std::vector &shape, - const MetadataFlag *vector_tensor, - const std::vector &component_labels) { - PARTHENON_REQUIRE_THROWS(sparse_id != InvalidSparseID, - "Tried to add InvalidSparseID to sparse pool " + base_name_); - +std::shared_ptr +MakeSparseVarMetadataImpl(Metadata *in, const std::vector &shape, + const MetadataFlag *vector_tensor, + const std::vector &component_labels) { // copy shared metadata - Metadata this_metadata( - shared_metadata_.Flags(), shape.size() > 0 ? shape : shared_metadata_.Shape(), - component_labels.size() > 0 ? component_labels - : shared_metadata_.getComponentLabels(), - shared_metadata_.getAssociated(), shared_metadata_.GetRefinementFunctions()); + auto this_metadata = std::make_shared( + in->Flags(), shape.size() > 0 ? shape : in->Shape(), + component_labels.size() > 0 ? component_labels : in->getComponentLabels(), + in->getAssociated(), in->GetRefinementFunctions()); - this_metadata.SetSparseThresholds(shared_metadata_.GetAllocationThreshold(), - shared_metadata_.GetDeallocationThreshold(), - shared_metadata_.GetDefaultValue()); + this_metadata->SetSparseThresholds(in->GetAllocationThreshold(), + in->GetDeallocationThreshold(), + in->GetDefaultValue()); // if vector_tensor is set, apply it if (vector_tensor != nullptr) { if (*vector_tensor == Metadata::Vector) { - this_metadata.Unset(Metadata::Tensor); - this_metadata.Set(Metadata::Vector); + this_metadata->Unset(Metadata::Tensor); + this_metadata->Set(Metadata::Vector); } else if (*vector_tensor == Metadata::Tensor) { - this_metadata.Unset(Metadata::Vector); - this_metadata.Set(Metadata::Tensor); + this_metadata->Unset(Metadata::Vector); + this_metadata->Set(Metadata::Tensor); } else if (*vector_tensor == Metadata::None) { - this_metadata.Unset(Metadata::Vector); - this_metadata.Unset(Metadata::Tensor); + this_metadata->Unset(Metadata::Vector); + this_metadata->Unset(Metadata::Tensor); } else { PARTHENON_THROW("Expected MetadataFlag Vector, Tensor, or None, but got " + vector_tensor->Name()); @@ -83,9 +82,26 @@ const Metadata &SparsePool::AddImpl(int sparse_id, const std::vector &shape } // just in case - this_metadata.IsValid(true); + this_metadata->IsValid(true); + + return this_metadata; +} + +const Metadata &SparsePool::AddImpl(int sparse_id, const std::vector &shape, + const MetadataFlag *vector_tensor, + const std::vector &component_labels) { + PARTHENON_REQUIRE_THROWS(sparse_id != InvalidSparseID, + "Tried to add InvalidSparseID to sparse pool " + base_name_); + + auto this_metadata = MakeSparseVarMetadataImpl(&shared_metadata_, shape, vector_tensor, + component_labels); + if (this_metadata->IsSet(Metadata::WithFluxes)) { + this_metadata->GetSPtrFluxMetadata() = + MakeSparseVarMetadataImpl(shared_metadata_.GetSPtrFluxMetadata().get(), shape, + vector_tensor, component_labels); + } - const auto ins = pool_.insert({sparse_id, this_metadata}); + const auto ins = pool_.insert({sparse_id, *this_metadata}); PARTHENON_REQUIRE_THROWS(ins.second, "Tried to add sparse ID " + std::to_string(sparse_id) + " to sparse pool '" + base_name_ + diff --git a/src/interface/state_descriptor.cpp b/src/interface/state_descriptor.cpp index 4ba65755fcb4..b468b0201091 100644 --- a/src/interface/state_descriptor.cpp +++ b/src/interface/state_descriptor.cpp @@ -274,29 +274,9 @@ bool StateDescriptor::AddFieldImpl(const VarID &vid, const Metadata &m_in, return false; // this field has already been added } else { if (m.IsSet(Metadata::WithFluxes) && m.GetFluxName() == "") { - std::vector mFlags = {Metadata::Flux}; - if (!m.IsSet(Metadata::FluxNotOneCopy)) mFlags.push_back(Metadata::OneCopy); - if (m.IsSet(Metadata::Sparse)) mFlags.push_back(Metadata::Sparse); - if (m.IsSet(Metadata::Fine)) mFlags.push_back(Metadata::Fine); - if (m.IsSet(Metadata::Cell)) { - mFlags.push_back(Metadata::Face); - mFlags.push_back(Metadata::CellMemAligned); - } else if (m.IsSet(Metadata::Face)) { - mFlags.push_back(Metadata::Edge); - } else if (m.IsSet(Metadata::Edge)) { - mFlags.push_back(Metadata::Node); - } - Metadata mf; - if (m.GetRefinementFunctions().label().size() > 0) { - // Propagate custom refinement ops to flux field - mf = Metadata(mFlags, m.Shape(), std::vector(), std::string(), - m.GetRefinementFunctions()); - } else { - mf = Metadata(mFlags, m.Shape()); - } auto fId = VarID{internal_fluxname + internal_varname_seperator + vid.base_name, vid.sparse_id}; - AddFieldImpl(fId, mf, control_vid); + AddFieldImpl(fId, *(m.GetSPtrFluxMetadata()), control_vid); m.SetFluxName(fId.label()); } metadataMap_.insert({vid, m}); From 2977980e7f78c75958bfa2615025f591d3cf38d8 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 4 Sep 2024 14:51:59 -0600 Subject: [PATCH 10/37] Add some documentation --- doc/sphinx/src/interface/metadata.rst | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/sphinx/src/interface/metadata.rst b/doc/sphinx/src/interface/metadata.rst index 4f08b7ede310..d87f4f8f92ec 100644 --- a/doc/sphinx/src/interface/metadata.rst +++ b/doc/sphinx/src/interface/metadata.rst @@ -158,9 +158,18 @@ classes may be allocated. The behaviours are the following: sense (e.g. if the ``WithFluxes`` variable has ``Metadata::Cell`` set the new variable will have ``Metadata::Face``) will be created in the package with the name ``bnd_flux::name_of_original_variable`` and - ``Metadata::Flux`` and ``Metadata::OneCopy``. When creating packs that - include fluxes, the new flux field will be included in the flux portion - of the pack if the parent field is in the pack. + ``Metadata::Flux`` and ``Metadata::OneCopy``. Additionally, the flags + ``Metadata::Sparse``, ``Metadata::Vector``, and ``Metadata::Tensor`` + will propagate to the flux ``Metadata`` if they are set in the base field + ``Metadata``. By default, a flux field for a cell-centered field is + built with ``Metadata::CellMemAligned`` flag set for backwards + compatability. A shared pointer to the ``Metadata`` object for the flux + field can be accessed from the base ``Metadata`` with the method + ``Metadata::GetSPtrFluxMetadata()``. This can be used to set flags other + than the defaults or set custom prolongation/restriction operations for + the fluxes. When creating packs that include fluxes, the new flux field + will be included in the flux portion of the pack if the parent field is + in the pack. - If ``Metadata::Flux`` is set, this field is exchanged on shared elements across fine-coarse boundaries when the flux correction tasks are called. From 917bf351b14d54e218c441f81121237079c65148 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 4 Sep 2024 15:20:32 -0600 Subject: [PATCH 11/37] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d889eef2a860..a027570334b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) -- [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Add Metadata::FluxNotOneCopy +- [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades ### Changed (changing behavior/API/variables/...) From d5c653dd318038c691e82569e563b6449aeaf387 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 4 Sep 2024 15:44:47 -0600 Subject: [PATCH 12/37] ensure backward compatibility --- doc/sphinx/src/interface/metadata.rst | 10 +++++++--- src/interface/metadata.hpp | 6 ++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/doc/sphinx/src/interface/metadata.rst b/doc/sphinx/src/interface/metadata.rst index d87f4f8f92ec..0ccfe73fe4a2 100644 --- a/doc/sphinx/src/interface/metadata.rst +++ b/doc/sphinx/src/interface/metadata.rst @@ -167,9 +167,13 @@ classes may be allocated. The behaviours are the following: field can be accessed from the base ``Metadata`` with the method ``Metadata::GetSPtrFluxMetadata()``. This can be used to set flags other than the defaults or set custom prolongation/restriction operations for - the fluxes. When creating packs that include fluxes, the new flux field - will be included in the flux portion of the pack if the parent field is - in the pack. + the fluxes. Note that calling `Metadata::RegisterRefinementOps<...>()` + on the base field propagates the registered refinement operations through + to the flux `Metadata` for backward compatibility. If separate operations + are desired for the fluxes, the ordering of calls to `RegisterRefinementOps` + on the base field and the flux field matters. When creating packs that + include fluxes, the new flux field will be included in the flux portion of + the pack if the parent field is in the pack. - If ``Metadata::Flux`` is set, this field is exchanged on shared elements across fine-coarse boundaries when the flux correction tasks are called. diff --git a/src/interface/metadata.hpp b/src/interface/metadata.hpp index 66d8e6496dc4..467038040ed7 100644 --- a/src/interface/metadata.hpp +++ b/src/interface/metadata.hpp @@ -559,6 +559,12 @@ class Metadata { refinement_funcs_ = refinement::RefinementFunctions_t::RegisterOps(); + // Propagate refinement operations to flux metadata for backward compatibility + if (IsSet(WithFluxes)) { + flux_metadata->refinement_funcs_ = + refinement::RefinementFunctions_t::RegisterOps(); + } } // Operators From 456445dbc363f17871bb54b87c632421b7e336b7 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 10 Sep 2024 12:40:50 -0600 Subject: [PATCH 13/37] add use_system_packages --- CMakeLists.txt | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c398d34eae29..abf6c16e4587 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,17 @@ option(CODE_COVERAGE "Enable code coverage reporting" OFF) option(ENABLE_ASAN "Turn on ASAN" OFF) option(ENABLE_HWASAN "Turn on HWASAN (currently ARM-only)" OFF) +option(PARTHENON_USE_SYSTEM_PACKAGES "Enables search for system packages when available" OFF) +if (PARTHENON_USE_SYSTEM_PACKAGES) + option(PARTHENON_IMPORT_KOKKOS + "If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon" + ON) +else() + option(PARTHENON_IMPORT_KOKKOS + "If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon" + OFF) +endif() + include(cmake/Format.cmake) include(cmake/Lint.cmake) @@ -204,7 +215,6 @@ endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD 17) -option(PARTHENON_IMPORT_KOKKOS "If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon" OFF) if (NOT TARGET Kokkos::kokkos) if (PARTHENON_IMPORT_KOKKOS) find_package(Kokkos 4) @@ -367,7 +377,11 @@ if (PARTHENON_ENABLE_UNIT_TESTS OR PARTHENON_ENABLE_INTEGRATION_TESTS OR PARTHEN endif() if (PARTHENON_ENABLE_ASCENT) - find_package(Ascent REQUIRED NO_DEFAULT_PATH) + if (PARTHENON_USE_SYSTEM_PACKAGES) + find_package(Ascent REQUIRED) + else() + find_package(Ascent REQUIRED NO_DEFAULT_PATH) + endif() endif() # Installation configuration From ad1e8aec608403b58a9c7c3c5c6692418b3a16ea Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 10 Sep 2024 12:43:29 -0600 Subject: [PATCH 14/37] update docs --- doc/sphinx/src/building.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinx/src/building.rst b/doc/sphinx/src/building.rst index 29559f1b1101..b0ce186002b0 100644 --- a/doc/sphinx/src/building.rst +++ b/doc/sphinx/src/building.rst @@ -45,6 +45,7 @@ General list of cmake options: || PARTHENON\_COPYRIGHT\_CHECK\_DEFAULT || OFF || Option || Check copyright as part of the default target (otherwise use the `check-copyright` target) | || CMAKE\_INSTALL\_PREFIX || machine specific || String || Optional path for library installation | || Kokkos\_ROOT || unset || String || Path to a Kokkos source directory (containing CMakeLists.txt) | +|| PARTHENON\_USE\_SYSTEM\_PACKAGES || OFF || Option || If ON, attempt to link to system dependencies for Kokkos and Ascent if possible. If off, will avoid doing so by default. | || PARTHENON\_IMPORT\_KOKKOS || ON/OFF || Option || If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon | || BUILD\_SHARED\_LIBS || OFF || Option || If installing Parthenon, whether to build as shared rather than static | +-------------------------------------------+--------------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ From 37cb89c14aa1c3017abf111807b2d1962b234dbe Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 10 Sep 2024 12:50:26 -0600 Subject: [PATCH 15/37] changelog and CC --- CHANGELOG.md | 4 ++++ CMakeLists.txt | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62b1ea9c5c78..edf4b18a8011 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Current develop +### Added (new features/APIs/variables/...) +- [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option + + ### Changed (changing behavior/API/variables/...) diff --git a/CMakeLists.txt b/CMakeLists.txt index abf6c16e4587..24a7a3b2ebfb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ # Copyright(C) 2020-2024 The Parthenon collaboration # Licensed under the 3-clause BSD License, see LICENSE file for details #========================================================================================= -# (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. +# (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. # # This program was produced under U.S. Government contract 89233218CNA000001 for Los # Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC From ec124755660466a303ec155d117fb329164bbe86 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 10 Sep 2024 12:58:35 -0600 Subject: [PATCH 16/37] LFR suggestion --- doc/sphinx/src/building.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx/src/building.rst b/doc/sphinx/src/building.rst index b0ce186002b0..b3a42bd4caec 100644 --- a/doc/sphinx/src/building.rst +++ b/doc/sphinx/src/building.rst @@ -45,8 +45,8 @@ General list of cmake options: || PARTHENON\_COPYRIGHT\_CHECK\_DEFAULT || OFF || Option || Check copyright as part of the default target (otherwise use the `check-copyright` target) | || CMAKE\_INSTALL\_PREFIX || machine specific || String || Optional path for library installation | || Kokkos\_ROOT || unset || String || Path to a Kokkos source directory (containing CMakeLists.txt) | -|| PARTHENON\_USE\_SYSTEM\_PACKAGES || OFF || Option || If ON, attempt to link to system dependencies for Kokkos and Ascent if possible. If off, will avoid doing so by default. | -|| PARTHENON\_IMPORT\_KOKKOS || ON/OFF || Option || If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon | +|| PARTHENON\_USE\_SYSTEM\_PACKAGES || OFF || Option || If ON, attempt to link to system dependencies for Kokkos and Ascent if possible. If OFF, will avoid doing so by default. | +|| PARTHENON\_IMPORT\_KOKKOS || OFF/ON || Option || If ON, attempt to link to an external Kokkos library. Else build from source. Default is ON if PARTHENON\_USE\_SYSTEM\_PACKAGES and OFF otherwise. | || BUILD\_SHARED\_LIBS || OFF || Option || If installing Parthenon, whether to build as shared rather than static | +-------------------------------------------+--------------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ From ec4bec2074a8b204ce4eb480fb37fedebb64e395 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 10 Sep 2024 14:35:00 -0600 Subject: [PATCH 17/37] make parthenon manager play nicely with other MPI libraries --- src/parthenon_manager.cpp | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/parthenon_manager.cpp b/src/parthenon_manager.cpp index 58df8de8ee5e..028d9883b1ec 100644 --- a/src/parthenon_manager.cpp +++ b/src/parthenon_manager.cpp @@ -50,26 +50,18 @@ ParthenonStatus ParthenonManager::ParthenonInitEnv(int argc, char *argv[]) { // initialize MPI #ifdef MPI_PARALLEL - if (MPI_SUCCESS != MPI_Init(&argc, &argv)) { - std::cout << "### FATAL ERROR in ParthenonInit" << std::endl + int mpi_initialized; + PARTHENON_MPI_CHECK(MPI_Initialized(&mpi_initialized)); + if (!mpi_initialized && (MPI_SUCCESS != MPI_Init(&argc, &argv))) { + std::cerr << "### FATAL ERROR in ParthenonInit" << std::endl << "MPI Initialization failed." << std::endl; return ParthenonStatus::error; } // Get process id (rank) in MPI_COMM_WORLD - if (MPI_SUCCESS != MPI_Comm_rank(MPI_COMM_WORLD, &(Globals::my_rank))) { - std::cout << "### FATAL ERROR in ParthenonInit" << std::endl - << "MPI_Comm_rank failed." << std::endl; - // MPI_Finalize(); - return ParthenonStatus::error; - } + PARTHENON_MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &(Globals::my_rank))); // Get total number of MPI processes (ranks) - if (MPI_SUCCESS != MPI_Comm_size(MPI_COMM_WORLD, &Globals::nranks)) { - std::cout << "### FATAL ERROR in main" << std::endl - << "MPI_Comm_size failed." << std::endl; - // MPI_Finalize(); - return ParthenonStatus::error; - } + PARTHENON_MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &Globals::nranks)); #else // no MPI Globals::my_rank = 0; Globals::nranks = 1; @@ -232,7 +224,9 @@ ParthenonStatus ParthenonManager::ParthenonFinalize() { pmesh.reset(); Kokkos::finalize(); #ifdef MPI_PARALLEL - MPI_Finalize(); + int mpi_finalized; + PARTHENON_MPI_CHECK(MPI_Finalized(&mpi_finalized)); + if (!mpi_finalized) PARTHENON_MPI_CHECK(MPI_Finalize()); #endif return ParthenonStatus::complete; } From cbdc096ab172179affbabeb4453477c6e2d98977 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 10 Sep 2024 14:38:37 -0600 Subject: [PATCH 18/37] changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62b1ea9c5c78..274d3a7eb43d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## Current develop ### Changed (changing behavior/API/variables/...) - +- [[PR1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls ### Fixed (not changing behavior/API/variables/...) From 7438eb20b2ce527edb3c3fbc83e3007629a2993e Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 11 Sep 2024 17:49:05 -0600 Subject: [PATCH 19/37] add hach checking for params --- src/outputs/output_utils.cpp | 40 +++++++++++++-- src/outputs/output_utils.hpp | 4 ++ src/outputs/parthenon_hdf5.cpp | 3 ++ src/parameter_input.hpp | 44 +++++++++++++++++ src/utils/hash.hpp | 11 +++-- tst/unit/CMakeLists.txt | 2 +- ...d_desired.cpp => test_parameter_input.cpp} | 49 +++++++++++++++++++ 7 files changed, 145 insertions(+), 8 deletions(-) rename tst/unit/{test_required_desired.cpp => test_parameter_input.cpp} (71%) diff --git a/src/outputs/output_utils.cpp b/src/outputs/output_utils.cpp index 8b9d8478cfde..4c3c46fee0b6 100644 --- a/src/outputs/output_utils.cpp +++ b/src/outputs/output_utils.cpp @@ -30,6 +30,7 @@ #include "mesh/mesh_refinement.hpp" #include "mesh/meshblock.hpp" #include "outputs/output_utils.hpp" +#include "parameter_input.hpp" namespace parthenon { namespace OutputUtils { @@ -306,21 +307,52 @@ std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count) { #endif // MPI_PARALLEL return out; } -std::size_t MPISum(std::size_t val) { +constexpr void CheckMPISizeT() { #ifdef MPI_PARALLEL - // Need to use sizeof here because unsigned long long and unsigned - // long are identical under the hood but registered as different - // types static_assert(std::is_integral::value && !std::is_signed::value, "size_t is unsigned and integral"); static_assert(sizeof(std::size_t) == sizeof(unsigned long long int), "MPI_UNSIGNED_LONG_LONG same as size_t"); + +#endif +} +std::size_t MPISum(std::size_t val) { +#ifdef MPI_PARALLEL + // Need to use sizeof here because unsigned long long and unsigned + // long are identical under the hood but registered as different + // types + CheckMPISizeT(); PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, &val, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD)); #endif return val; } +void CheckParameterInputConsistent(ParameterInput *pin) { +#ifdef MPI_PARALLEL + CheckMPISizeT(); + + std::size_t pin_hash = std::hash()(*pin); + std::size_t pin_hash_root = pin_hash; + PARTHENON_MPI_CHECK( + MPI_Bcast(&pin_hash_root, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD)); + + int is_same_local = (pin_hash == pin_hash_root); + int pinput_same_accross_ranks; + PARTHENON_MPI_CHECK(MPI_Reduce(&is_same_local, &pinput_same_accross_ranks, 1, MPI_INT, + MPI_LAND, 0, MPI_COMM_WORLD)); + if (Globals::my_rank == 0) { + PARTHENON_REQUIRE_THROWS( + pinput_same_accross_ranks, + "Parameter input object must be the same on every rank, otherwise I/O may " + "be\n\t\t" + "unable to write it safely. If you reached this error message, look to make " + "sure\n\t\t" + "that your calls to functions that look like pin->GetOrAdd are all called\n\t\t" + "exactly the same way on every MPI rank."); + } +#endif // MPI_PARALLEL +} } // namespace OutputUtils } // namespace parthenon diff --git a/src/outputs/output_utils.hpp b/src/outputs/output_utils.hpp index cc65dc63f7a2..5f95fbbaedc2 100644 --- a/src/outputs/output_utils.hpp +++ b/src/outputs/output_utils.hpp @@ -43,6 +43,9 @@ #include "utils/error_checking.hpp" namespace parthenon { +// forward declaration +class ParameterInput; + namespace OutputUtils { // Helper struct containing some information about a variable struct VarInfo { @@ -348,6 +351,7 @@ std::vector ComputeDerefinementCount(Mesh *pm); std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count); std::size_t MPISum(std::size_t local); +void CheckParameterInputConsistent(ParameterInput *pin); } // namespace OutputUtils } // namespace parthenon diff --git a/src/outputs/parthenon_hdf5.cpp b/src/outputs/parthenon_hdf5.cpp index 7872e2ad4d03..6cb8c9a84955 100644 --- a/src/outputs/parthenon_hdf5.cpp +++ b/src/outputs/parthenon_hdf5.cpp @@ -72,6 +72,9 @@ void PHDF5Output::WriteOutputFileImpl(Mesh *pm, ParameterInput *pin, SimTime *tm Kokkos::Profiling::pushRegion("PHDF5::WriteOutputFileRealPrec"); } + // Check that the parameter input is safe to write to HDF5 + OutputUtils::CheckParameterInputConsistent(pin); + // writes all graphics variables to hdf file // HDF5 structures // Also writes companion xdmf file diff --git a/src/parameter_input.hpp b/src/parameter_input.hpp index 31c45dee050d..5379f5587dc4 100644 --- a/src/parameter_input.hpp +++ b/src/parameter_input.hpp @@ -31,6 +31,7 @@ #include "config.hpp" #include "defs.hpp" #include "outputs/io_wrapper.hpp" +#include "utils/hash.hpp" #include "utils/string_utils.hpp" namespace parthenon { @@ -74,6 +75,8 @@ class InputBlock { // Functions are implemented in parameter_input.cpp class ParameterInput { + friend class std::hash; + public: // constructor/destructor ParameterInput(); @@ -213,4 +216,45 @@ class ParameterInput { } }; } // namespace parthenon + +// JMM: Believe it or not, this is the recommended way to overload hash functions +// See: https://en.cppreference.com/w/cpp/utility/hash +namespace std { +template <> +struct hash { + std::size_t operator()(const parthenon::InputLine &il) { + return parthenon::impl::hash_combine(0, il.param_name, il.param_value, + il.param_comment); + } +}; + +template <> +struct hash { + std::size_t operator()(const parthenon::InputBlock &ib) { + using parthenon::impl::hash_combine; + std::size_t out = + hash_combine(0, ib.block_name, ib.max_len_parname, ib.max_len_parvalue); + for (parthenon::InputLine *pline = ib.pline; pline != nullptr; pline = pline->pnext) { + out = hash_combine(out, *pline); + } + return out; + } +}; + +template <> +struct hash { + std::size_t operator()(const parthenon::ParameterInput &in) { + using parthenon::InputBlock; + using parthenon::impl::hash_combine; + std::size_t out = 0; + out = hash_combine(out, in.last_filename_); + for (InputBlock *pblock = in.pfirst_block; pblock != nullptr; + pblock = pblock->pnext) { + out = hash_combine(out, *pblock); + } + return out; + } +}; +} // namespace std + #endif // PARAMETER_INPUT_HPP_ diff --git a/src/utils/hash.hpp b/src/utils/hash.hpp index 2f6592e3baa4..be28f197b6d3 100644 --- a/src/utils/hash.hpp +++ b/src/utils/hash.hpp @@ -20,15 +20,20 @@ #include #include +#include namespace parthenon { namespace impl { -template -std::size_t hash_combine(std::size_t lhs, const T &v) { +template +std::size_t hash_combine(std::size_t lhs, const T &v, Rest &&...rest) { std::size_t rhs = std::hash()(v); // The boost hash combine function lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2); - return lhs; + if constexpr (sizeof...(Rest) > 0) { + return hash_combine(lhs, std::forward(rest)...); + } else { + return lhs; + } } template ::value - 1> diff --git a/tst/unit/CMakeLists.txt b/tst/unit/CMakeLists.txt index b44e6a7986da..c892572bdde9 100644 --- a/tst/unit/CMakeLists.txt +++ b/tst/unit/CMakeLists.txt @@ -36,7 +36,7 @@ list(APPEND unit_tests_SOURCES test_pararrays.cpp test_sparse_pack.cpp test_swarm.cpp - test_required_desired.cpp + test_parameter_input.cpp test_error_checking.cpp test_partitioning.cpp test_state_descriptor.cpp diff --git a/tst/unit/test_required_desired.cpp b/tst/unit/test_parameter_input.cpp similarity index 71% rename from tst/unit/test_required_desired.cpp rename to tst/unit/test_parameter_input.cpp index 407ecb4b390a..38a2e1b9d901 100644 --- a/tst/unit/test_required_desired.cpp +++ b/tst/unit/test_parameter_input.cpp @@ -101,3 +101,52 @@ TEST_CASE("Test required/desired checking from inputs", "[ParameterInput]") { } } } + +TEST_CASE("Parameter inputs can be hashed and hashing provides useful sanity checks", + "[ParameterInput][Hash]") { + GIVEN("Two ParameterInput objects already populated") { + ParameterInput in1, in2; + std::hash hasher; + std::stringstream ss; + ss << "" << std::endl + << "var1 = 0 # comment" << std::endl + << "var2 = 1, & # another comment" << std::endl + << " 2" << std::endl + << "" << std::endl + << "var3 = 3" << std::endl + << "# comment" << std::endl + << "var4 = 4" << std::endl; + + // JMM: streams are stateful. Need to be very careful here. + std::string ideck = ss.str(); + std::istringstream s1(ideck); + std::istringstream s2(ideck); + in1.LoadFromStream(s1); + in2.LoadFromStream(s2); + + WHEN("We hash these parameter inputs") { + std::size_t hash1 = hasher(in1); + std::size_t hash2 = hasher(in2); + THEN("The hashes agree") { REQUIRE(hash1 == hash2); } + + AND_WHEN("We modify both parameter inputs in the same way") { + in1.GetOrAddReal("block3", "var5", 2.0); + in2.GetOrAddReal("block3", "var5", 2.0); + THEN("The hashes agree") { + std::size_t hash1 = hasher(in1); + std::size_t hash2 = hasher(in2); + REQUIRE(hash1 == hash2); + + AND_WHEN("When we modify one input but not the other") { + in2.GetOrAddInteger("block3", "var6", 7); + THEN("The hashes will not agree") { + std::size_t hash1 = hasher(in1); + std::size_t hash2 = hasher(in2); + REQUIRE(hash1 != hash2); + } + } + } + } + } + } +} From 0a89ba42e97b7d72d18dd719c9ea7e4131ffe242 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 11 Sep 2024 17:54:44 -0600 Subject: [PATCH 20/37] changelog --- CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c37f63c6d2d3..ad620f69d70a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,16 +5,14 @@ ### Added (new features/APIs/variables/...) - [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option - ### Changed (changing behavior/API/variables/...) - [[PR1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls ### Fixed (not changing behavior/API/variables/...) - +- [[PR1173]](https://github.com/parthenon-hpc-lab/parthenon/pull/1173) Make debugging easier by making parthenon throw an error if ParameterInput is different on multiple MPI ranks. ### Infrastructure (changes irrelevant to downstream codes) - ### Removed (removing behavior/API/varaibles/...) From a80e91e40fe26dcb879e39dcc184a5f64684bc57 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 11 Sep 2024 17:55:36 -0600 Subject: [PATCH 21/37] CC --- src/parameter_input.hpp | 2 +- src/utils/hash.hpp | 2 +- tst/unit/test_parameter_input.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parameter_input.hpp b/src/parameter_input.hpp index 5379f5587dc4..cb23c1c2cc9e 100644 --- a/src/parameter_input.hpp +++ b/src/parameter_input.hpp @@ -3,7 +3,7 @@ // Copyright(C) 2014 James M. Stone and other code contributors // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2022. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC diff --git a/src/utils/hash.hpp b/src/utils/hash.hpp index be28f197b6d3..77642a64dba3 100644 --- a/src/utils/hash.hpp +++ b/src/utils/hash.hpp @@ -3,7 +3,7 @@ // Copyright(C) 2022 The Parthenon collaboration // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2022. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2022-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC diff --git a/tst/unit/test_parameter_input.cpp b/tst/unit/test_parameter_input.cpp index 38a2e1b9d901..b6f03008eb2e 100644 --- a/tst/unit/test_parameter_input.cpp +++ b/tst/unit/test_parameter_input.cpp @@ -3,7 +3,7 @@ // Copyright(C) 2014 James M. Stone and other code contributors // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC From 94188156d8ceb9c71be833c300d03df4649231bd Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 11 Sep 2024 18:00:00 -0600 Subject: [PATCH 22/37] move comment --- src/outputs/output_utils.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/outputs/output_utils.cpp b/src/outputs/output_utils.cpp index 4c3c46fee0b6..9714291293c2 100644 --- a/src/outputs/output_utils.cpp +++ b/src/outputs/output_utils.cpp @@ -309,6 +309,9 @@ std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count) { } constexpr void CheckMPISizeT() { #ifdef MPI_PARALLEL + // Need to use sizeof here because unsigned long long and unsigned + // long are identical under the hood but registered as different + // types static_assert(std::is_integral::value && !std::is_signed::value, "size_t is unsigned and integral"); @@ -319,9 +322,6 @@ constexpr void CheckMPISizeT() { } std::size_t MPISum(std::size_t val) { #ifdef MPI_PARALLEL - // Need to use sizeof here because unsigned long long and unsigned - // long are identical under the hood but registered as different - // types CheckMPISizeT(); PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, &val, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD)); From 693b437b4ed5dd293b9019eda9d168522c96660f Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Thu, 12 Sep 2024 09:30:18 -0600 Subject: [PATCH 23/37] remove extraneous reduce --- src/outputs/output_utils.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/outputs/output_utils.cpp b/src/outputs/output_utils.cpp index 9714291293c2..9995869b8138 100644 --- a/src/outputs/output_utils.cpp +++ b/src/outputs/output_utils.cpp @@ -337,21 +337,14 @@ void CheckParameterInputConsistent(ParameterInput *pin) { std::size_t pin_hash_root = pin_hash; PARTHENON_MPI_CHECK( MPI_Bcast(&pin_hash_root, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD)); - - int is_same_local = (pin_hash == pin_hash_root); - int pinput_same_accross_ranks; - PARTHENON_MPI_CHECK(MPI_Reduce(&is_same_local, &pinput_same_accross_ranks, 1, MPI_INT, - MPI_LAND, 0, MPI_COMM_WORLD)); - if (Globals::my_rank == 0) { - PARTHENON_REQUIRE_THROWS( - pinput_same_accross_ranks, - "Parameter input object must be the same on every rank, otherwise I/O may " - "be\n\t\t" - "unable to write it safely. If you reached this error message, look to make " - "sure\n\t\t" - "that your calls to functions that look like pin->GetOrAdd are all called\n\t\t" - "exactly the same way on every MPI rank."); - } + PARTHENON_REQUIRE_THROWS( + pin_hash == pin_hash_root, + "Parameter input object must be the same on every rank, otherwise I/O may " + "be\n\t\t" + "unable to write it safely. If you reached this error message, look to make " + "sure\n\t\t" + "that your calls to functions that look like pin->GetOrAdd are all called\n\t\t" + "exactly the same way on every MPI rank."); #endif // MPI_PARALLEL } } // namespace OutputUtils From 3aee4349df79caa5955462a9b86c82c4c382f265 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Thu, 12 Sep 2024 10:01:53 -0600 Subject: [PATCH 24/37] use forward declarations to simplify includes in boundary conditions hpp --- src/bvals/boundary_conditions.hpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/bvals/boundary_conditions.hpp b/src/bvals/boundary_conditions.hpp index 9718ebcd7a58..19a8f7309715 100644 --- a/src/bvals/boundary_conditions.hpp +++ b/src/bvals/boundary_conditions.hpp @@ -19,14 +19,17 @@ #include #include "basic_types.hpp" -#include "interface/meshblock_data.hpp" -#include "interface/swarm_container.hpp" -#include "mesh/domain.hpp" namespace parthenon { -// Physical boundary conditions +// Forward declarations +template< typename T> +class MeshBlockData; +template +class MeshData; +class Swarm; +// Physical boundary conditions using BValFunc = std::function> &, bool)>; using SBValFunc = std::function &)>; From f6d63778ce45615cb09135e706b2b5c7560cf367 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Thu, 12 Sep 2024 16:07:17 -0600 Subject: [PATCH 25/37] move a bunch of stuff to impl --- src/CMakeLists.txt | 2 + src/bvals/boundary_conditions.hpp | 4 +- src/interface/data_collection.hpp | 2 +- src/interface/meshblock_data.cpp | 62 +++++++++ src/interface/meshblock_data.hpp | 59 +------- src/interface/metadata.cpp | 22 +++ src/interface/metadata.hpp | 22 +-- src/interface/sparse_pool.hpp | 2 + src/interface/state_descriptor.cpp | 105 +++++++++++++- src/interface/state_descriptor.hpp | 129 ++++------------- src/interface/swarm_container.cpp | 18 +++ src/interface/swarm_container.hpp | 18 +-- src/interface/swarm_pack_base.hpp | 1 + src/mesh/forest/forest.cpp | 34 +++++ src/mesh/forest/forest.hpp | 33 +---- src/mesh/mesh.cpp | 2 + src/mesh/mesh.hpp | 3 +- src/mesh/meshblock_pack.hpp | 5 +- src/outputs/parthenon_hdf5.cpp | 5 + src/outputs/parthenon_hdf5.hpp | 7 +- src/parthenon_manager.cpp | 1 + src/tasks/tasks.cpp | 215 +++++++++++++++++++++++++++++ src/tasks/tasks.hpp | 197 ++------------------------ src/utils/bit_hacks.hpp | 14 +- src/utils/cell_center_offsets.cpp | 66 +++++++++ src/utils/cell_center_offsets.hpp | 45 +----- tst/style/cpplint.py | 1 - tst/unit/test_state_descriptor.cpp | 1 + 28 files changed, 591 insertions(+), 484 deletions(-) create mode 100644 src/tasks/tasks.cpp create mode 100644 src/utils/cell_center_offsets.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7f66585a8e8c..133279563126 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -230,6 +230,7 @@ add_library(parthenon solvers/mg_solver.hpp solvers/solver_utils.hpp + tasks/tasks.cpp tasks/tasks.hpp tasks/thread_pool.hpp @@ -244,6 +245,7 @@ add_library(parthenon utils/bit_hacks.hpp utils/buffer_utils.cpp utils/buffer_utils.hpp + utils/cell_center_offsets.cpp utils/cell_center_offsets.hpp utils/change_rundir.cpp utils/communication_buffer.hpp diff --git a/src/bvals/boundary_conditions.hpp b/src/bvals/boundary_conditions.hpp index 19a8f7309715..020c20bcb720 100644 --- a/src/bvals/boundary_conditions.hpp +++ b/src/bvals/boundary_conditions.hpp @@ -23,9 +23,9 @@ namespace parthenon { // Forward declarations -template< typename T> +template class MeshBlockData; -template +template class MeshData; class Swarm; diff --git a/src/interface/data_collection.hpp b/src/interface/data_collection.hpp index c980f293a154..043bf878f8b8 100644 --- a/src/interface/data_collection.hpp +++ b/src/interface/data_collection.hpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC diff --git a/src/interface/meshblock_data.cpp b/src/interface/meshblock_data.cpp index 9a927062f8aa..fcc2407eb574 100644 --- a/src/interface/meshblock_data.cpp +++ b/src/interface/meshblock_data.cpp @@ -51,6 +51,68 @@ void MeshBlockData::AddField(const std::string &base_name, const Metadata &me } } +template +void MeshBlockData::Add(std::shared_ptr> var) noexcept { + if (varUidMap_.count(var->GetUniqueID())) { + PARTHENON_THROW("Tried to add variable " + var->label() + " twice!"); + } + varVector_.push_back(var); + varMap_[var->label()] = var; + varUidMap_[var->GetUniqueID()] = var; + for (const auto &flag : var->metadata().Flags()) { + flagsToVars_[flag].insert(var); + } +} + +template +bool MeshBlockData::operator==(const MeshBlockData &cmp) { + // do some kind of check of equality + // do the two containers contain the same named fields? + std::vector my_keys; + std::vector cmp_keys; + for (auto &v : varMap_) { + my_keys.push_back(v.first); + } + for (auto &v : cmp.GetVariableMap()) { + cmp_keys.push_back(v.first); + } + return (my_keys == cmp_keys); +} + +template +std::shared_ptr> MeshBlockData::AllocateSparse(std::string const &label, + bool flag_uninitialized) { + if (!HasVariable(label)) { + PARTHENON_THROW("Tried to allocate sparse variable '" + label + + "', but no such sparse variable exists"); + } + + auto var = GetVarPtr(label); + PARTHENON_REQUIRE_THROWS(var->IsSparse(), + "Tried to allocate non-sparse variable " + label); + + var->Allocate(pmy_block, flag_uninitialized); + + return var; +} + +template +void MeshBlockData::DeallocateSparse(std::string const &label) { + PARTHENON_REQUIRE_THROWS(HasVariable(label), + "Tried to deallocate sparse variable '" + label + + "', but no such sparse variable exists"); + + auto var = GetVarPtr(label); + // PARTHENON_REQUIRE_THROWS(var->IsSparse(), + // "Tried to deallocate non-sparse variable " + label); + + if (var->IsAllocated()) { + std::int64_t bytes = var->Deallocate(); + auto pmb = GetBlockPointer(); + pmb->LogMemUsage(-bytes); + } +} + /// Queries related to variable packs /// This is a helper function that queries the cache for the given pack. /// The strings are the keys and the lists are the values. diff --git a/src/interface/meshblock_data.hpp b/src/interface/meshblock_data.hpp index c7133317bd76..625294ba8b14 100644 --- a/src/interface/meshblock_data.hpp +++ b/src/interface/meshblock_data.hpp @@ -506,19 +506,7 @@ class MeshBlockData { // return number of stored arrays int Size() noexcept { return varVector_.size(); } - bool operator==(const MeshBlockData &cmp) { - // do some kind of check of equality - // do the two containers contain the same named fields? - std::vector my_keys; - std::vector cmp_keys; - for (auto &v : varMap_) { - my_keys.push_back(v.first); - } - for (auto &v : cmp.GetVariableMap()) { - cmp_keys.push_back(v.first); - } - return (my_keys == cmp_keys); - } + bool operator==(const MeshBlockData &cmp); bool Contains(const std::string &name) const noexcept { return varMap_.count(name); } bool Contains(const Uid_t &uid) const noexcept { return varUidMap_.count(uid); } @@ -553,54 +541,15 @@ class MeshBlockData { void AddField(const std::string &base_name, const Metadata &metadata, int sparse_id = InvalidSparseID); - void Add(std::shared_ptr> var) noexcept { - if (varUidMap_.count(var->GetUniqueID())) { - PARTHENON_THROW("Tried to add variable " + var->label() + " twice!"); - } - varVector_.push_back(var); - varMap_[var->label()] = var; - varUidMap_[var->GetUniqueID()] = var; - for (const auto &flag : var->metadata().Flags()) { - flagsToVars_[flag].insert(var); - } - } + void Add(std::shared_ptr> var) noexcept; std::shared_ptr> AllocateSparse(std::string const &label, - bool flag_uninitialized = false) { - if (!HasVariable(label)) { - PARTHENON_THROW("Tried to allocate sparse variable '" + label + - "', but no such sparse variable exists"); - } - - auto var = GetVarPtr(label); - PARTHENON_REQUIRE_THROWS(var->IsSparse(), - "Tried to allocate non-sparse variable " + label); - - var->Allocate(pmy_block, flag_uninitialized); - - return var; - } - + bool flag_uninitialized = false); std::shared_ptr> AllocSparseID(std::string const &base_name, const int sparse_id) { return AllocateSparse(MakeVarLabel(base_name, sparse_id)); } - - void DeallocateSparse(std::string const &label) { - PARTHENON_REQUIRE_THROWS(HasVariable(label), - "Tried to deallocate sparse variable '" + label + - "', but no such sparse variable exists"); - - auto var = GetVarPtr(label); - // PARTHENON_REQUIRE_THROWS(var->IsSparse(), - // "Tried to deallocate non-sparse variable " + label); - - if (var->IsAllocated()) { - std::int64_t bytes = var->Deallocate(); - auto pmb = GetBlockPointer(); - pmb->LogMemUsage(-bytes); - } - } + void DeallocateSparse(std::string const &label); std::weak_ptr pmy_block; std::shared_ptr resolved_packages; diff --git a/src/interface/metadata.cpp b/src/interface/metadata.cpp index fce88334c652..89b1898fb2dc 100644 --- a/src/interface/metadata.cpp +++ b/src/interface/metadata.cpp @@ -279,6 +279,28 @@ std::vector Metadata::Flags() const { return set_flags; } +bool Metadata::HasSameFlags(const Metadata &b) const { + auto const &a = *this; + + // Check extra bits are unset + auto const min_bits = std::min(a.bits_.size(), b.bits_.size()); + auto const &longer = a.bits_.size() > b.bits_.size() ? a.bits_ : b.bits_; + for (auto i = min_bits; i < longer.size(); i++) { + if (longer[i]) { + // Bits are default false, so if any bit in the extraneous portion of the longer + // bit list is set, then it cannot be equal to a. + return false; + } + } + + for (size_t i = 0; i < min_bits; i++) { + if (a.bits_[i] != b.bits_[i]) { + return false; + } + } + return true; +} + std::array Metadata::GetArrayDims(std::weak_ptr wpmb, bool coarse) const { std::array arrDims; diff --git a/src/interface/metadata.hpp b/src/interface/metadata.hpp index 5770323c21ba..5dadfecbfe7f 100644 --- a/src/interface/metadata.hpp +++ b/src/interface/metadata.hpp @@ -539,27 +539,7 @@ class Metadata { } // Operators - bool HasSameFlags(const Metadata &b) const { - auto const &a = *this; - - // Check extra bits are unset - auto const min_bits = std::min(a.bits_.size(), b.bits_.size()); - auto const &longer = a.bits_.size() > b.bits_.size() ? a.bits_ : b.bits_; - for (auto i = min_bits; i < longer.size(); i++) { - if (longer[i]) { - // Bits are default false, so if any bit in the extraneous portion of the longer - // bit list is set, then it cannot be equal to a. - return false; - } - } - - for (size_t i = 0; i < min_bits; i++) { - if (a.bits_[i] != b.bits_[i]) { - return false; - } - } - return true; - } + bool HasSameFlags(const Metadata &b) const; bool operator==(const Metadata &b) const { return HasSameFlags(b) && (shape_ == b.shape_); diff --git a/src/interface/sparse_pool.hpp b/src/interface/sparse_pool.hpp index 0b029e8c2eb7..1c3323da0673 100644 --- a/src/interface/sparse_pool.hpp +++ b/src/interface/sparse_pool.hpp @@ -24,6 +24,8 @@ #include "variable.hpp" namespace parthenon { +class Metadata; +class MetadataFlag; class SparsePool { public: diff --git a/src/interface/state_descriptor.cpp b/src/interface/state_descriptor.cpp index af73259ff6aa..31dea193c5aa 100644 --- a/src/interface/state_descriptor.cpp +++ b/src/interface/state_descriptor.cpp @@ -22,11 +22,33 @@ #include "basic_types.hpp" #include "interface/metadata.hpp" +#include "interface/packages.hpp" #include "interface/state_descriptor.hpp" +#include "interface/swarm.hpp" +#include "interface/variable.hpp" #include "utils/error_checking.hpp" namespace parthenon { +void RefinementFunctionMaps::Register(const Metadata &m, std::string varname) { + if (m.HasRefinementOps()) { + const auto &funcs = m.GetRefinementFunctions(); + // Guard against uninitialized refinement functions by checking + // if the label is the empty string. + if (funcs.label().size() == 0) { + std::stringstream ss; + ss << "Variable " << varname << " registed for refinement, " + << "but no prolongation/restriction options found!" + << "Please register them with Metadata::RegisterRefinementOps." << std::endl; + PARTHENON_THROW(ss); + } + bool in_map = (funcs_to_ids.count(funcs) > 0); + if (!in_map) { + funcs_to_ids[funcs] = next_refinement_id_++; + } + } +} + void Packages_t::Add(const std::shared_ptr &package) { const auto &name = package->label(); PARTHENON_REQUIRE_THROWS(packages_.count(name) == 0, @@ -262,8 +284,21 @@ bool StateDescriptor::AddSwarmValue(const std::string &value_name, return true; } -bool StateDescriptor::AddFieldImpl(const VarID &vid, const Metadata &m_in, - const VarID &control_vid) { +bool StateDescriptor::AddField(const std::string &field_name, const Metadata &m_in, + const std::string &controlling_field) { + Metadata m = m_in; // so we can modify it + if (m.IsSet(Metadata::Sparse)) { + PARTHENON_THROW( + "Tried to add a sparse field with AddField, use AddSparsePool instead"); + } + if (!m.IsSet(GetMetadataFlag())) m.Set(GetMetadataFlag()); + VarID controller = VarID(controlling_field); + if (controlling_field == "") controller = VarID(field_name); + return AddFieldImpl_(VarID(field_name), m, controller); +} + +bool StateDescriptor::AddFieldImpl_(const VarID &vid, const Metadata &m_in, + const VarID &control_vid) { Metadata m = m_in; // Force const correctness const std::string &assoc = m.getAssociated(); @@ -294,7 +329,7 @@ bool StateDescriptor::AddFieldImpl(const VarID &vid, const Metadata &m_in, } auto fId = VarID{internal_fluxname + internal_varname_seperator + vid.base_name, vid.sparse_id}; - AddFieldImpl(fId, mf, control_vid); + AddFieldImpl_(fId, mf, control_vid); m.SetFluxName(fId.label()); } metadataMap_.insert({vid, m}); @@ -308,7 +343,7 @@ bool StateDescriptor::AddFieldImpl(const VarID &vid, const Metadata &m_in, return true; } -bool StateDescriptor::AddSparsePoolImpl(const SparsePool &pool) { +bool StateDescriptor::AddSparsePoolImpl_(const SparsePool &pool) { if (pool.pool().size() == 0) { return false; } @@ -325,8 +360,8 @@ bool StateDescriptor::AddSparsePoolImpl(const SparsePool &pool) { if (controller_base == "") controller_base = pool.base_name(); // add all the sparse fields for (const auto itr : pool.pool()) { - if (!AddFieldImpl(VarID(pool.base_name(), itr.first), itr.second, - VarID(controller_base, itr.first))) { + if (!AddFieldImpl_(VarID(pool.base_name(), itr.first), itr.second, + VarID(controller_base, itr.first))) { // a field with this name already exists, this would leave the StateDescriptor in an // inconsistent state, so throw PARTHENON_THROW("Couldn't add sparse field " + @@ -337,6 +372,24 @@ bool StateDescriptor::AddSparsePoolImpl(const SparsePool &pool) { return true; } +std::vector StateDescriptor::Fields() noexcept { + std::vector names; + names.reserve(metadataMap_.size()); + for (auto &x : metadataMap_) { + names.push_back(x.first.label()); + } + return names; +} + +std::vector StateDescriptor::Swarms() noexcept { + std::vector names; + names.reserve(swarmMetadataMap_.size()); + for (auto &x : swarmMetadataMap_) { + names.push_back(x.first); + } + return names; +} + bool StateDescriptor::FlagsPresent(std::vector const &flags, bool matchAny) { for (auto &pair : metadataMap_) @@ -350,6 +403,46 @@ bool StateDescriptor::FlagsPresent(std::vector const &flags, return false; } +std::string StateDescriptor::GetFieldController(const std::string &field_name) { + VarID field_id(field_name); + auto controller = allocControllerReverseMap_.find(field_id); + PARTHENON_REQUIRE(controller != allocControllerReverseMap_.end(), + "Asking for controlling field that is not in this package (" + + field_name + ")"); + return controller->second.label(); +} + +bool StateDescriptor::SwarmValuePresent(const std::string &value_name, + const std::string &swarm_name) const noexcept { + if (!SwarmPresent(swarm_name)) return false; + return swarmValueMetadataMap_.at(swarm_name).count(value_name) > 0; +} + +const std::vector & +StateDescriptor::GetControlledVariables(const std::string &field_name) { + auto iter = allocControllerMap_.find(field_name); + if (iter == allocControllerMap_.end()) return nullControl_; + return iter->second; +} + +std::vector StateDescriptor::GetControlVariables() { + std::vector vars; + for (auto &pair : allocControllerMap_) { + vars.push_back(pair.first); + } + return vars; +} + +// retrieve metadata for a specific field +const Metadata &StateDescriptor::FieldMetadata(const std::string &base_name, + int sparse_id) const { + const auto itr = metadataMap_.find(VarID(base_name, sparse_id)); + PARTHENON_REQUIRE_THROWS(itr != metadataMap_.end(), + "FieldMetadata: Non-existent field: " + + MakeVarLabel(base_name, sparse_id)); + return itr->second; +} + std::ostream &operator<<(std::ostream &os, const StateDescriptor &sd) { os << "# Package: " << sd.label() << "\n" << "# ---------------------------------------------------\n" diff --git a/src/interface/state_descriptor.hpp b/src/interface/state_descriptor.hpp index c488d53a1bcd..106c3ea6e525 100644 --- a/src/interface/state_descriptor.hpp +++ b/src/interface/state_descriptor.hpp @@ -24,15 +24,12 @@ #include #include -#include "amr_criteria/amr_criteria.hpp" #include "basic_types.hpp" +#include "bvals/boundary_conditions.hpp" #include "interface/metadata.hpp" -#include "interface/packages.hpp" #include "interface/params.hpp" #include "interface/sparse_pool.hpp" -#include "interface/swarm.hpp" #include "interface/var_id.hpp" -#include "interface/variable.hpp" #include "outputs/output_parameters.hpp" #include "prolong_restrict/prolong_restrict.hpp" #include "utils/error_checking.hpp" @@ -44,9 +41,8 @@ template class MeshBlockData; template class MeshData; - -using BValFunc = std::function> &, bool)>; -using SBValFunc = std::function &)>; +class AMRCriteria; +class Packages_t; /// A little container class owning refinement function properties /// needed for the state descriptor. @@ -61,25 +57,7 @@ using SBValFunc = std::function &)>; /// TODO(JMM): The IDs here are not the same as the variable unique /// IDs but they maybe could be? We should consider unifying that. struct RefinementFunctionMaps { - void Register(const Metadata &m, std::string varname) { - if (m.HasRefinementOps()) { - const auto &funcs = m.GetRefinementFunctions(); - // Guard against uninitialized refinement functions by checking - // if the label is the empty string. - if (funcs.label().size() == 0) { - std::stringstream ss; - ss << "Variable " << varname << " registed for refinement, " - << "but no prolongation/restriction options found!" - << "Please register them with Metadata::RegisterRefinementOps." << std::endl; - PARTHENON_THROW(ss); - } - bool in_map = (funcs_to_ids.count(funcs) > 0); - if (!in_map) { - funcs_to_ids[funcs] = next_refinement_id_++; - } - } - } - + void Register(const Metadata &m, std::string varname); std::size_t size() const noexcept { return next_refinement_id_; } // A unique enumeration of refinement functions starting from zero. // This is used for caching which prolongation/restriction operator @@ -191,27 +169,8 @@ class StateDescriptor { } // field addition / retrieval routines - private: - // internal function to add dense/sparse fields. Private because outside classes must - // use the public interface below - bool AddFieldImpl(const VarID &vid, const Metadata &m, const VarID &control_vid); - - // add a sparse pool - bool AddSparsePoolImpl(const SparsePool &pool); - - public: bool AddField(const std::string &field_name, const Metadata &m_in, - const std::string &controlling_field = "") { - Metadata m = m_in; // so we can modify it - if (m.IsSet(Metadata::Sparse)) { - PARTHENON_THROW( - "Tried to add a sparse field with AddField, use AddSparsePool instead"); - } - if (!m.IsSet(GetMetadataFlag())) m.Set(GetMetadataFlag()); - VarID controller = VarID(controlling_field); - if (controlling_field == "") controller = VarID(field_name); - return AddFieldImpl(VarID(field_name), m, controller); - } + const std::string &controlling_field = ""); template bool AddField(const Metadata &m, const std::string &controlling_field = "") { return AddField(T::name(), m, controlling_field); @@ -222,13 +181,13 @@ class StateDescriptor { // SparsePool constructors template bool AddSparsePool(Args &&...args) { - return AddSparsePoolImpl(SparsePool(std::forward(args)...)); + return AddSparsePoolImpl_(SparsePool(std::forward(args)...)); } template bool AddSparsePool(const std::string &base_name, const Metadata &m_in, Args &&...args) { Metadata m = m_in; // so we can modify it if (!m.IsSet(GetMetadataFlag())) m.Set(GetMetadataFlag()); - return AddSparsePoolImpl(SparsePool(base_name, m, std::forward(args)...)); + return AddSparsePoolImpl_(SparsePool(base_name, m, std::forward(args)...)); } template bool AddSparsePool(const Metadata &m_in, Args &&...args) { @@ -239,24 +198,10 @@ class StateDescriptor { int size() const noexcept { return metadataMap_.size(); } // retrieve all field names - std::vector Fields() noexcept { - std::vector names; - names.reserve(metadataMap_.size()); - for (auto &x : metadataMap_) { - names.push_back(x.first.label()); - } - return names; - } + std::vector Fields() noexcept; // retrieve all swarm names - std::vector Swarms() noexcept { - std::vector names; - names.reserve(swarmMetadataMap_.size()); - for (auto &x : swarmMetadataMap_) { - names.push_back(x.first); - } - return names; - } + std::vector Swarms() noexcept; const auto &AllFields() const noexcept { return metadataMap_; } const auto &AllSparsePools() const noexcept { return sparsePoolMap_; } @@ -297,6 +242,7 @@ class StateDescriptor { const auto &RefinementFncsToIDs() const noexcept { return refinementFuncMaps_.funcs_to_ids; } + bool FieldPresent(const std::string &base_name, int sparse_id = InvalidSparseID) const noexcept { return metadataMap_.count(VarID(base_name, sparse_id)) > 0; @@ -311,46 +257,20 @@ class StateDescriptor { return swarmMetadataMap_.count(swarm_name) > 0; } bool SwarmValuePresent(const std::string &value_name, - const std::string &swarm_name) const noexcept { - if (!SwarmPresent(swarm_name)) return false; - return swarmValueMetadataMap_.at(swarm_name).count(value_name) > 0; - } - - std::string GetFieldController(const std::string &field_name) { - VarID field_id(field_name); - auto controller = allocControllerReverseMap_.find(field_id); - PARTHENON_REQUIRE(controller != allocControllerReverseMap_.end(), - "Asking for controlling field that is not in this package (" + - field_name + ")"); - return controller->second.label(); - } + const std::string &swarm_name) const noexcept; + std::string GetFieldController(const std::string &field_name); bool ControlVariablesSet() { return (allocControllerMap_.size() > 0); } - - const std::vector &GetControlledVariables(const std::string &field_name) { - auto iter = allocControllerMap_.find(field_name); - if (iter == allocControllerMap_.end()) return nullControl_; - return iter->second; - } - - std::vector GetControlVariables() { - std::vector vars; - for (auto &pair : allocControllerMap_) { - vars.push_back(pair.first); - } - return vars; - } + const std::vector &GetControlledVariables(const std::string &field_name); + std::vector GetControlVariables(); // retrieve metadata for a specific field const Metadata &FieldMetadata(const std::string &base_name, - int sparse_id = InvalidSparseID) const { - const auto itr = metadataMap_.find(VarID(base_name, sparse_id)); - PARTHENON_REQUIRE_THROWS(itr != metadataMap_.end(), - "FieldMetadata: Non-existent field: " + - MakeVarLabel(base_name, sparse_id)); - return itr->second; + int sparse_id = InvalidSparseID) const; + // retrieve metadata for a specific swarm + Metadata &SwarmMetadata(const std::string &swarm_name) noexcept { + return swarmMetadataMap_[swarm_name]; } - const auto &GetSparsePool(const std::string &base_name) const noexcept { const auto itr = sparsePoolMap_.find(base_name); PARTHENON_REQUIRE_THROWS(itr != sparsePoolMap_.end(), @@ -358,12 +278,6 @@ class StateDescriptor { return itr->second; } - // retrieve metadata for a specific swarm - Metadata &SwarmMetadata(const std::string &swarm_name) noexcept { - // TODO(JL) Do we want to add a default metadata for a non-existent swarm_name? - return swarmMetadataMap_[swarm_name]; - } - bool FlagsPresent(std::vector const &flags, bool matchAny = false); void PreCommFillDerived(MeshBlockData *rc) const { @@ -470,6 +384,13 @@ class StateDescriptor { std::array, BOUNDARY_NFACES> UserSwarmBoundaryFunctions; protected: + // internal function to add dense/sparse fields. Private because outside classes must + // use the public interface below + bool AddFieldImpl_(const VarID &vid, const Metadata &m, const VarID &control_vid); + + // add a sparse pool + bool AddSparsePoolImpl_(const SparsePool &pool); + void InvertControllerMap(); Params params_; diff --git a/src/interface/swarm_container.cpp b/src/interface/swarm_container.cpp index 9fc26a213e03..6e62ee0e72b6 100644 --- a/src/interface/swarm_container.cpp +++ b/src/interface/swarm_container.cpp @@ -214,4 +214,22 @@ void SwarmContainer::Print() const { } } +bool SwarmContainer::operator==(const SwarmContainer &cmp) { + // Test that labels of swarms are the same + std::vector my_keys(swarmMap_.size()); + auto &cmpMap = cmp.GetSwarmMap(); + std::vector cmp_keys(cmpMap.size()); + size_t i = 0; + for (auto &s : swarmMap_) { + my_keys[i] = s.first; + i++; + } + i = 0; + for (auto &s : cmpMap) { + cmp_keys[i] = s.first; + i++; + } + return my_keys == cmp_keys; +} + } // namespace parthenon diff --git a/src/interface/swarm_container.hpp b/src/interface/swarm_container.hpp index 1a1486b2a44c..b69900d2480f 100644 --- a/src/interface/swarm_container.hpp +++ b/src/interface/swarm_container.hpp @@ -163,23 +163,7 @@ class SwarmContainer { TaskStatus FinalizeCommunicationIterative(); [[deprecated("Not yet implemented")]] void ClearBoundary(BoundaryCommSubset phase); - bool operator==(const SwarmContainer &cmp) { - // Test that labels of swarms are the same - std::vector my_keys(swarmMap_.size()); - auto &cmpMap = cmp.GetSwarmMap(); - std::vector cmp_keys(cmpMap.size()); - size_t i = 0; - for (auto &s : swarmMap_) { - my_keys[i] = s.first; - i++; - } - i = 0; - for (auto &s : cmpMap) { - cmp_keys[i] = s.first; - i++; - } - return my_keys == cmp_keys; - } + bool operator==(const SwarmContainer &cmp); private: void UpdateMetadataMap_(std::shared_ptr swarm) { diff --git a/src/interface/swarm_pack_base.hpp b/src/interface/swarm_pack_base.hpp index b954aa6d2762..0733aa51f329 100644 --- a/src/interface/swarm_pack_base.hpp +++ b/src/interface/swarm_pack_base.hpp @@ -26,6 +26,7 @@ #include "interface/pack_utils.hpp" #include "interface/state_descriptor.hpp" +#include "interface/swarm_device_context.hpp" #include "interface/variable.hpp" #include "utils/utils.hpp" diff --git a/src/mesh/forest/forest.cpp b/src/mesh/forest/forest.cpp index 6afad77259e3..4e5ab68c2a35 100644 --- a/src/mesh/forest/forest.cpp +++ b/src/mesh/forest/forest.cpp @@ -24,6 +24,7 @@ #include #include +#include "application_input.hpp" #include "basic_types.hpp" #include "defs.hpp" #include "mesh/forest/forest.hpp" @@ -295,5 +296,38 @@ Forest Forest::Make2D(ForestDefinition &forest_def) { return fout; } +// TODO(LFR): Probably eventually remove this. This is only meaningful for simply +// oriented grids +LogicalLocation Forest::GetLegacyTreeLocation(const LogicalLocation &loc) const { + if (loc.tree() < 0) + return loc; // This is already presumed to be an Athena++ tree location + auto parent_loc = trees.at(loc.tree())->athena_forest_loc; + int composite_level = parent_loc.level() + loc.level(); + int lx1 = (parent_loc.lx1() << loc.level()) + loc.lx1(); + int lx2 = (parent_loc.lx2() << loc.level()) + loc.lx2(); + int lx3 = (parent_loc.lx3() << loc.level()) + loc.lx3(); + return LogicalLocation(composite_level, lx1, lx2, lx3); +} + +LogicalLocation +Forest::GetForestLocationFromLegacyTreeLocation(const LogicalLocation &loc) const { + if (loc.tree() >= 0) + return loc; // This location is already associated with a tree in the Parthenon + // forest + int macro_level = (*trees.begin()).second->athena_forest_loc.level(); + auto forest_loc = loc.GetParent(loc.level() - macro_level); + for (auto &[id, t] : trees) { + if (t->athena_forest_loc == forest_loc) { + return LogicalLocation( + t->GetId(), loc.level() - macro_level, + loc.lx1() - (forest_loc.lx1() << (loc.level() - macro_level)), + loc.lx2() - (forest_loc.lx2() << (loc.level() - macro_level)), + loc.lx3() - (forest_loc.lx3() << (loc.level() - macro_level))); + } + } + PARTHENON_FAIL("Somehow didn't find a tree."); + return LogicalLocation(); +} + } // namespace forest } // namespace parthenon diff --git a/src/mesh/forest/forest.hpp b/src/mesh/forest/forest.hpp index 773d9057136c..cf5f2780cbc1 100644 --- a/src/mesh/forest/forest.hpp +++ b/src/mesh/forest/forest.hpp @@ -23,7 +23,6 @@ #include #include -#include "application_input.hpp" #include "basic_types.hpp" #include "defs.hpp" #include "mesh/forest/forest_topology.hpp" @@ -33,6 +32,8 @@ #include "utils/indexer.hpp" namespace parthenon { +class ApplicationInput; + namespace forest { template @@ -161,36 +162,10 @@ class Forest { // TODO(LFR): Probably eventually remove this. This is only meaningful for simply // oriented grids - LogicalLocation GetLegacyTreeLocation(const LogicalLocation &loc) const { - if (loc.tree() < 0) - return loc; // This is already presumed to be an Athena++ tree location - auto parent_loc = trees.at(loc.tree())->athena_forest_loc; - int composite_level = parent_loc.level() + loc.level(); - int lx1 = (parent_loc.lx1() << loc.level()) + loc.lx1(); - int lx2 = (parent_loc.lx2() << loc.level()) + loc.lx2(); - int lx3 = (parent_loc.lx3() << loc.level()) + loc.lx3(); - return LogicalLocation(composite_level, lx1, lx2, lx3); - } + LogicalLocation GetLegacyTreeLocation(const LogicalLocation &loc) const; LogicalLocation - GetForestLocationFromLegacyTreeLocation(const LogicalLocation &loc) const { - if (loc.tree() >= 0) - return loc; // This location is already associated with a tree in the Parthenon - // forest - int macro_level = (*trees.begin()).second->athena_forest_loc.level(); - auto forest_loc = loc.GetParent(loc.level() - macro_level); - for (auto &[id, t] : trees) { - if (t->athena_forest_loc == forest_loc) { - return LogicalLocation( - t->GetId(), loc.level() - macro_level, - loc.lx1() - (forest_loc.lx1() << (loc.level() - macro_level)), - loc.lx2() - (forest_loc.lx2() << (loc.level() - macro_level)), - loc.lx3() - (forest_loc.lx3() << (loc.level() - macro_level))); - } - } - PARTHENON_FAIL("Somehow didn't find a tree."); - return LogicalLocation(); - } + GetForestLocationFromLegacyTreeLocation(const LogicalLocation &loc) const; std::size_t CountTrees() const { return trees.size(); } diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp index 2672155e83dc..f88c6fc645e6 100644 --- a/src/mesh/mesh.cpp +++ b/src/mesh/mesh.cpp @@ -37,10 +37,12 @@ #include "bvals/comms/bvals_in_one.hpp" #include "parthenon_mpi.hpp" +#include "application_input.hpp" #include "bvals/boundary_conditions.hpp" #include "bvals/bvals.hpp" #include "defs.hpp" #include "globals.hpp" +#include "interface/packages.hpp" #include "interface/state_descriptor.hpp" #include "interface/update.hpp" #include "mesh/mesh.hpp" diff --git a/src/mesh/mesh.hpp b/src/mesh/mesh.hpp index ca221d3f6e56..684a897aad56 100644 --- a/src/mesh/mesh.hpp +++ b/src/mesh/mesh.hpp @@ -34,7 +34,6 @@ #include #include -#include "application_input.hpp" #include "bvals/boundary_conditions.hpp" #include "bvals/comms/tag_map.hpp" #include "config.hpp" @@ -59,8 +58,10 @@ namespace parthenon { // Forward declarations +class ApplicationInput; class MeshBlock; class MeshRefinement; +class Packages_t; class ParameterInput; class RestartReader; diff --git a/src/mesh/meshblock_pack.hpp b/src/mesh/meshblock_pack.hpp index edeca40ab734..5669e112109b 100644 --- a/src/mesh/meshblock_pack.hpp +++ b/src/mesh/meshblock_pack.hpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -24,13 +24,10 @@ #include "coordinates/coordinates.hpp" #include "interface/variable_pack.hpp" #include "kokkos_abstraction.hpp" -#include "mesh/domain.hpp" -#include "mesh/meshblock.hpp" // TODO(JMM): Replace with forward declaration? namespace parthenon { class Mesh; -// class MeshBlock; // a separate dims array removes a branch case in `GetDim` // TODO(JMM): Using one IndexShape because its the same for all diff --git a/src/outputs/parthenon_hdf5.cpp b/src/outputs/parthenon_hdf5.cpp index 7872e2ad4d03..3f7c88835c69 100644 --- a/src/outputs/parthenon_hdf5.cpp +++ b/src/outputs/parthenon_hdf5.cpp @@ -741,6 +741,11 @@ void PHDF5Output::WriteSparseInfo_(Mesh *pm, hbool_t *sparse_allocated, // Utility functions implemented namespace HDF5 { +H5G MakeGroup(hid_t file, const std::string &name) { + return H5G::FromHIDCheck( + H5Gcreate(file, name.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)); +} + hid_t GenerateFileAccessProps() { #ifdef MPI_PARALLEL /* set the file access template for parallel IO access */ diff --git a/src/outputs/parthenon_hdf5.hpp b/src/outputs/parthenon_hdf5.hpp index d51c50025875..72deaab90681 100644 --- a/src/outputs/parthenon_hdf5.hpp +++ b/src/outputs/parthenon_hdf5.hpp @@ -66,13 +66,8 @@ namespace parthenon { namespace HDF5 { -// Implemented in CPP file as it's complex hid_t GenerateFileAccessProps(); - -inline H5G MakeGroup(hid_t file, const std::string &name) { - return H5G::FromHIDCheck( - H5Gcreate(file, name.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)); -} +H5G MakeGroup(hid_t file, const std::string &name); template void HDF5WriteND(hid_t location, const std::string &name, const T *data, int rank, diff --git a/src/parthenon_manager.cpp b/src/parthenon_manager.cpp index 028d9883b1ec..61396f42721c 100644 --- a/src/parthenon_manager.cpp +++ b/src/parthenon_manager.cpp @@ -26,6 +26,7 @@ #include +#include "amr_criteria/amr_criteria.hpp" #include "amr_criteria/refinement_package.hpp" #include "config.hpp" #include FS_HEADER diff --git a/src/tasks/tasks.cpp b/src/tasks/tasks.cpp new file mode 100644 index 000000000000..8d4cbd656d55 --- /dev/null +++ b/src/tasks/tasks.cpp @@ -0,0 +1,215 @@ +//======================================================================================== +// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== + +#include +#include +#include +#include + +#if __has_include() +#include //NOLINT +#define HAS_CXX_ABI +#endif + +#include "tasks.hpp" +#include "thread_pool.hpp" +#include "utils/error_checking.hpp" + +namespace parthenon { +TaskID TaskID::operator|(const TaskID &other) const { + // calling this operator means you're building a TaskID to hold a dependency + TaskID result; + if (task != nullptr) + result.dep.push_back(task); + else + result.dep.insert(result.dep.end(), dep.begin(), dep.end()); + if (other.task != nullptr) + result.dep.push_back(other.task); + else + result.dep.insert(result.dep.end(), other.dep.begin(), other.dep.end()); + return result; +} + +TaskStatus Task::operator()() { + auto status = f(); + if (verbose_level_ > 0) + printf("%s [status = %i, rank = %i]\n", label_.c_str(), static_cast(status), + Globals::my_rank); + if (task_type == TaskType::completion) { + // keep track of how many times it's been called + num_calls += (status == TaskStatus::iterate || status == TaskStatus::complete); + // enforce minimum number of iterations + if (num_calls < exec_limits.first && status == TaskStatus::complete) + status = TaskStatus::iterate; + // enforce maximum number of iterations + if (num_calls == exec_limits.second) status = TaskStatus::complete; + } + // save the status in the Task object + SetStatus(status); + return status; +} + +bool Task::ready() { + // check that no dependency is incomplete + bool go = true; + for (auto &dep : dependencies) { + go = go && (dep->GetStatus() != TaskStatus::incomplete); + } + return go; +} + +inline std::ostream &WriteTaskGraph(std::ostream &stream, + const std::vector> &tasks) { +#ifndef HAS_CXX_ABI + std::cout << "Warning: task graph output will not include function" + "signatures since libcxxabi is unavailable.\n"; +#endif + std::vector> replacements; + replacements.emplace_back("parthenon::", ""); + replacements.emplace_back("std::", ""); + replacements.emplace_back("MeshData<[^>]*>", "MD"); + replacements.emplace_back("MeshBlockData<[^>]*>", "MBD"); + replacements.emplace_back("shared_ptr", "sptr"); + replacements.emplace_back("TaskStatus ", ""); + replacements.emplace_back("BoundaryType::", ""); + + stream << "digraph {\n"; + stream << "node [fontname=\"Helvetica,Arial,sans-serif\"]\n"; + stream << "edge [fontname=\"Helvetica,Arial,sans-serif\"]\n"; + constexpr int kBufSize = 1024; + char buf[kBufSize]; + for (auto &ptask : tasks) { + std::string cleaned_label = ptask->GetLabel(); + for (auto &[re, str] : replacements) + cleaned_label = std::regex_replace(cleaned_label, re, str); + snprintf(buf, kBufSize, " n%p [label=\"%s\"];\n", ptask->GetID().GetTask(), + cleaned_label.c_str()); + stream << std::string(buf); + } + for (auto &ptask : tasks) { + for (auto &pdtask : ptask->GetDependent(TaskStatus::complete)) { + snprintf(buf, kBufSize, " n%p -> n%p [style=\"solid\"];\n", + ptask->GetID().GetTask(), pdtask->GetID().GetTask()); + stream << std::string(buf); + } + } + for (auto &ptask : tasks) { + for (auto &pdtask : ptask->GetDependent(TaskStatus::iterate)) { + snprintf(buf, kBufSize, " n%p -> n%p [style=\"dashed\"];\n", + ptask->GetID().GetTask(), pdtask->GetID().GetTask()); + stream << std::string(buf); + } + } + stream << "}\n"; + return stream; +} + +TaskListStatus TaskRegion::Execute(ThreadPool &pool) { + // for now, require a pool with one thread + PARTHENON_REQUIRE_THROWS(pool.size() == 1, + "ThreadPool size != 1 is not currently supported.") + + // first, if needed, finish building the graph + if (!graph_built) BuildGraph(); + + // declare this so it can call itself + std::function ProcessTask; + ProcessTask = [&pool, &ProcessTask](Task *task) -> TaskStatus { + auto status = task->operator()(); + auto next_up = task->GetDependent(status); + for (auto t : next_up) { + if (t->ready()) { + pool.enqueue([t, &ProcessTask]() { return ProcessTask(t); }); + } + } + return status; + }; + + // now enqueue the "first_task" for all task lists + for (auto &tl : task_lists) { + auto t = tl.GetStartupTask(); + pool.enqueue([t, &ProcessTask]() { return ProcessTask(t); }); + } + + // then wait until everything is done + pool.wait(); + + // Check the results, so as to fire any exceptions from threads + // Return failure if a task failed + return (pool.check_task_returns() == TaskStatus::complete) ? TaskListStatus::complete + : TaskListStatus::fail; +} + +void TaskRegion::AppendTasks(std::vector> &tasks_inout) { + BuildGraph(); + for (const auto &tl : task_lists) { + tl.AppendTasks(tasks_inout); + } +} + +void TaskRegion::AddRegionalDependencies(const std::vector &tls) { + const auto num_lists = tls.size(); + const auto num_regional = tls.front()->NumRegional(); + std::vector tasks(num_lists); + for (int i = 0; i < num_regional; i++) { + for (int j = 0; j < num_lists; j++) { + tasks[j] = tls[j]->Regional(i); + } + std::vector> reg_dep; + for (int j = 0; j < num_lists; j++) { + reg_dep.push_back(std::vector()); + for (auto t : tasks[j]->GetDependent(TaskStatus::complete)) { + reg_dep[j].push_back(t); + } + } + for (int j = 0; j < num_lists; j++) { + for (auto t : reg_dep[j]) { + for (int k = 0; k < num_lists; k++) { + if (j == k) continue; + t->AddDependency(tasks[k]); + tasks[k]->AddDependent(t, TaskStatus::complete); + } + } + } + } +} + +void TaskRegion::BuildGraph() { + // first handle regional dependencies by getting a vector of pointers + // to every sub-TaskList of each of the main TaskLists in the region + // (and also including a pointer to the main TaskLists). Match these + // TaskLists up across the region and insert their regional dependencies + std::vector> tls; + for (auto &tl : task_lists) + tls.emplace_back(tl.GetAllTaskLists()); + + int num_sublists = tls.front().size(); + std::vector matching_lists(task_lists.size()); + for (int sl = 0; sl < num_sublists; ++sl) { + for (int i = 0; i < task_lists.size(); ++i) + matching_lists[i] = tls[i][sl]; + AddRegionalDependencies(matching_lists); + } + + // now hook up iterations + for (auto &tl : task_lists) { + tl.ConnectIteration(); + } + + graph_built = true; + for (auto &tl : task_lists) { + tl.SetGraphBuilt(); + } +} + +} // namespace parthenon diff --git a/src/tasks/tasks.hpp b/src/tasks/tasks.hpp index d432f0a7ea21..a0090c391a83 100644 --- a/src/tasks/tasks.hpp +++ b/src/tasks/tasks.hpp @@ -13,18 +13,12 @@ #ifndef TASKS_TASKS_HPP_ #define TASKS_TASKS_HPP_ -#if __has_include() -#include //NOLINT -#define HAS_CXX_ABI -#endif - #include #include #include #include #include #include -#include #include #include #include @@ -84,19 +78,7 @@ class TaskID { // pointers to Task are implicitly convertible to TaskID TaskID(Task *t) : task(t) {} // NOLINT(runtime/explicit) - TaskID operator|(const TaskID &other) const { - // calling this operator means you're building a TaskID to hold a dependency - TaskID result; - if (task != nullptr) - result.dep.push_back(task); - else - result.dep.insert(result.dep.end(), dep.begin(), dep.end()); - if (other.task != nullptr) - result.dep.push_back(other.task); - else - result.dep.insert(result.dep.end(), other.dep.begin(), other.dep.end()); - return result; - } + TaskID operator|(const TaskID &other) const; const std::vector &GetIDs() const { return std::cref(dep); } @@ -130,34 +112,10 @@ class Task { dependent[static_cast(TaskStatus::incomplete)].push_back(this); } - TaskStatus operator()() { - auto status = f(); - if (verbose_level_ > 0) - printf("%s [status = %i, rank = %i]\n", label_.c_str(), static_cast(status), - Globals::my_rank); - if (task_type == TaskType::completion) { - // keep track of how many times it's been called - num_calls += (status == TaskStatus::iterate || status == TaskStatus::complete); - // enforce minimum number of iterations - if (num_calls < exec_limits.first && status == TaskStatus::complete) - status = TaskStatus::iterate; - // enforce maximum number of iterations - if (num_calls == exec_limits.second) status = TaskStatus::complete; - } - // save the status in the Task object - SetStatus(status); - return status; - } + TaskStatus operator()(); TaskID GetID() { return this; } std::string GetLabel() const { return label_; } - bool ready() { - // check that no dependency is incomplete - bool go = true; - for (auto &dep : dependencies) { - go = go && (dep->GetStatus() != TaskStatus::incomplete); - } - return go; - } + bool ready(); void AddDependency(Task *t) { dependencies.insert(t); } std::unordered_set &GetDependencies() { return dependencies; } void AddDependent(Task *t, TaskStatus status) { @@ -193,51 +151,8 @@ class Task { std::string label_; }; -inline std::ostream &WriteTaskGraph(std::ostream &stream, - const std::vector> &tasks) { -#ifndef HAS_CXX_ABI - std::cout << "Warning: task graph output will not include function" - "signatures since libcxxabi is unavailable.\n"; -#endif - std::vector> replacements; - replacements.emplace_back("parthenon::", ""); - replacements.emplace_back("std::", ""); - replacements.emplace_back("MeshData<[^>]*>", "MD"); - replacements.emplace_back("MeshBlockData<[^>]*>", "MBD"); - replacements.emplace_back("shared_ptr", "sptr"); - replacements.emplace_back("TaskStatus ", ""); - replacements.emplace_back("BoundaryType::", ""); - - stream << "digraph {\n"; - stream << "node [fontname=\"Helvetica,Arial,sans-serif\"]\n"; - stream << "edge [fontname=\"Helvetica,Arial,sans-serif\"]\n"; - constexpr int kBufSize = 1024; - char buf[kBufSize]; - for (auto &ptask : tasks) { - std::string cleaned_label = ptask->GetLabel(); - for (auto &[re, str] : replacements) - cleaned_label = std::regex_replace(cleaned_label, re, str); - snprintf(buf, kBufSize, " n%p [label=\"%s\"];\n", ptask->GetID().GetTask(), - cleaned_label.c_str()); - stream << std::string(buf); - } - for (auto &ptask : tasks) { - for (auto &pdtask : ptask->GetDependent(TaskStatus::complete)) { - snprintf(buf, kBufSize, " n%p -> n%p [style=\"solid\"];\n", - ptask->GetID().GetTask(), pdtask->GetID().GetTask()); - stream << std::string(buf); - } - } - for (auto &ptask : tasks) { - for (auto &pdtask : ptask->GetDependent(TaskStatus::iterate)) { - snprintf(buf, kBufSize, " n%p -> n%p [style=\"dashed\"];\n", - ptask->GetID().GetTask(), pdtask->GetID().GetTask()); - stream << std::string(buf); - } - } - stream << "}\n"; - return stream; -} +std::ostream &WriteTaskGraph(std::ostream &stream, + const std::vector> &tasks); class TaskRegion; class TaskList { @@ -551,44 +466,8 @@ class TaskRegion { task_lists[i].SetID(i); } - TaskListStatus Execute(ThreadPool &pool) { - // for now, require a pool with one thread - PARTHENON_REQUIRE_THROWS(pool.size() == 1, - "ThreadPool size != 1 is not currently supported.") - - // first, if needed, finish building the graph - if (!graph_built) BuildGraph(); - - // declare this so it can call itself - std::function ProcessTask; - ProcessTask = [&pool, &ProcessTask](Task *task) -> TaskStatus { - auto status = task->operator()(); - auto next_up = task->GetDependent(status); - for (auto t : next_up) { - if (t->ready()) { - pool.enqueue([t, &ProcessTask]() { return ProcessTask(t); }); - } - } - return status; - }; - - // now enqueue the "first_task" for all task lists - for (auto &tl : task_lists) { - auto t = tl.GetStartupTask(); - pool.enqueue([t, &ProcessTask]() { return ProcessTask(t); }); - } - - // then wait until everything is done - pool.wait(); - - // Check the results, so as to fire any exceptions from threads - // Return failure if a task failed - return (pool.check_task_returns() == TaskStatus::complete) ? TaskListStatus::complete - : TaskListStatus::fail; - } - + TaskListStatus Execute(ThreadPool &pool); TaskList &operator[](const int i) { return task_lists[i]; } - size_t size() const { return task_lists.size(); } inline friend std::ostream &operator<<(std::ostream &stream, TaskRegion ®ion) { @@ -601,67 +480,9 @@ class TaskRegion { std::vector task_lists; bool graph_built = false; - void AppendTasks(std::vector> &tasks_inout) { - BuildGraph(); - for (const auto &tl : task_lists) { - tl.AppendTasks(tasks_inout); - } - } - - void AddRegionalDependencies(const std::vector &tls) { - const auto num_lists = tls.size(); - const auto num_regional = tls.front()->NumRegional(); - std::vector tasks(num_lists); - for (int i = 0; i < num_regional; i++) { - for (int j = 0; j < num_lists; j++) { - tasks[j] = tls[j]->Regional(i); - } - std::vector> reg_dep; - for (int j = 0; j < num_lists; j++) { - reg_dep.push_back(std::vector()); - for (auto t : tasks[j]->GetDependent(TaskStatus::complete)) { - reg_dep[j].push_back(t); - } - } - for (int j = 0; j < num_lists; j++) { - for (auto t : reg_dep[j]) { - for (int k = 0; k < num_lists; k++) { - if (j == k) continue; - t->AddDependency(tasks[k]); - tasks[k]->AddDependent(t, TaskStatus::complete); - } - } - } - } - } - - void BuildGraph() { - // first handle regional dependencies by getting a vector of pointers - // to every sub-TaskList of each of the main TaskLists in the region - // (and also including a pointer to the main TaskLists). Match these - // TaskLists up across the region and insert their regional dependencies - std::vector> tls; - for (auto &tl : task_lists) - tls.emplace_back(tl.GetAllTaskLists()); - - int num_sublists = tls.front().size(); - std::vector matching_lists(task_lists.size()); - for (int sl = 0; sl < num_sublists; ++sl) { - for (int i = 0; i < task_lists.size(); ++i) - matching_lists[i] = tls[i][sl]; - AddRegionalDependencies(matching_lists); - } - - // now hook up iterations - for (auto &tl : task_lists) { - tl.ConnectIteration(); - } - - graph_built = true; - for (auto &tl : task_lists) { - tl.SetGraphBuilt(); - } - } + void AppendTasks(std::vector> &tasks_inout); + void AddRegionalDependencies(const std::vector &tls); + void BuildGraph(); }; class TaskCollection { diff --git a/src/utils/bit_hacks.hpp b/src/utils/bit_hacks.hpp index 89a831b2e537..c9fa7c73c660 100644 --- a/src/utils/bit_hacks.hpp +++ b/src/utils/bit_hacks.hpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -90,18 +90,18 @@ inline int NumberOfBinaryTrailingZeros(std::uint64_t val) { inline int MaximumPowerOf2Divisor(int in) { return in & (~(in - 1)); } -inline uint IntegerLog2Ceil(uint in) { - uint log2 = 0; - uint in_temp = in; +inline unsigned int IntegerLog2Ceil(unsigned int in) { + unsigned int log2 = 0; + unsigned int in_temp = in; while (in_temp >>= 1) { log2++; } - uint pow = 1U << log2; + unsigned int pow = 1U << log2; return log2 + (pow != in); } -inline uint IntegerLog2Floor(uint in) { - uint log2 = 0; +inline unsigned int IntegerLog2Floor(unsigned int in) { + unsigned int log2 = 0; while (in >>= 1) { log2++; } diff --git a/src/utils/cell_center_offsets.cpp b/src/utils/cell_center_offsets.cpp new file mode 100644 index 000000000000..f22fcd874561 --- /dev/null +++ b/src/utils/cell_center_offsets.cpp @@ -0,0 +1,66 @@ +//======================================================================================== +// (C) (or copyright) 2023-2024. Triad National Security, LLC. All rights reserved. +// +// This program was produced under U.S. Government contract 89233218CNA000001 for Los +// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC +// for the U.S. Department of Energy/National Nuclear Security Administration. All rights +// in the program are reserved by Triad National Security, LLC, and the U.S. Department +// of Energy/National Nuclear Security Administration. The Government is granted for +// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide +// license in this material to reproduce, prepare derivative works, distribute copies to +// the public, perform publicly and display publicly, and to permit others to do so. +//======================================================================================== + +#include + +#include "utils/cell_center_offsets.hpp" +#include "utils/error_checking.hpp" + +namespace parthenon { + +BoundaryFace CellCentOffsets::Face() const { + if (!IsFace()) return BoundaryFace::undef; + for (int dir = 0; dir < 3; ++dir) { + if (static_cast(u[dir])) + return static_cast((1 + static_cast(u[dir])) / 2 + 2 * dir); + } + PARTHENON_FAIL("Shouldn't get here."); + return BoundaryFace::undef; +} + +std::vector CellCentOffsets::GetTangentDirections() const { + std::vector dirs; + CoordinateDirection missed; + for (auto dir : {X1DIR, X2DIR, X3DIR}) { + uint dir_idx = static_cast(dir); + if (!static_cast(u[dir_idx - 1])) { // This direction has no offset, so must be + // tangent direction + dirs.push_back(dir); + } else { + missed = dir; + } + } + if (dirs.size() == 2 && missed == X2DIR) { + dirs = {X3DIR, X1DIR}; // Make sure we are in cyclic order + } + return dirs; +} + +std::vector> CellCentOffsets::GetNormals() const { + std::vector> dirs; + CoordinateDirection missed; + for (auto dir : {X1DIR, X2DIR, X3DIR}) { + uint dir_idx = dir - 1; + if (static_cast(u[dir_idx])) { + dirs.push_back({dir, u[dir_idx]}); + } else { + missed = dir; + } + } + if (dirs.size() == 2 && missed == X2DIR) { + dirs = {dirs[1], dirs[0]}; // Make sure we are in cyclic order + } + return dirs; +} + +} // namespace parthenon diff --git a/src/utils/cell_center_offsets.hpp b/src/utils/cell_center_offsets.hpp index 1ef49d74855e..2bc41e323f7e 100644 --- a/src/utils/cell_center_offsets.hpp +++ b/src/utils/cell_center_offsets.hpp @@ -64,55 +64,16 @@ struct CellCentOffsets { return {static_cast(u[0]), static_cast(u[1]), static_cast(u[2])}; } - BoundaryFace Face() const { - if (!IsFace()) return BoundaryFace::undef; - for (int dir = 0; dir < 3; ++dir) { - if (static_cast(u[dir])) - return static_cast((1 + static_cast(u[dir])) / 2 + 2 * dir); - } - PARTHENON_FAIL("Shouldn't get here."); - return BoundaryFace::undef; - } + BoundaryFace Face() const; // Get the logical directions that are tangent to this element // (in cyclic order, XY, YZ, ZX, XYZ) - std::vector GetTangentDirections() const { - std::vector dirs; - CoordinateDirection missed; - for (auto dir : {X1DIR, X2DIR, X3DIR}) { - uint dir_idx = static_cast(dir); - if (!static_cast(u[dir_idx - 1])) { // This direction has no offset, so must be - // tangent direction - dirs.push_back(dir); - } else { - missed = dir; - } - } - if (dirs.size() == 2 && missed == X2DIR) { - dirs = {X3DIR, X1DIR}; // Make sure we are in cyclic order - } - return dirs; - } + std::vector GetTangentDirections() const; // Get the logical directions that are normal to this element // (in cyclic order, XY, YZ, ZX, XYZ) along with the offset of the // element in that direction from the cell center. - std::vector> GetNormals() const { - std::vector> dirs; - CoordinateDirection missed; - for (auto dir : {X1DIR, X2DIR, X3DIR}) { - uint dir_idx = dir - 1; - if (static_cast(u[dir_idx])) { - dirs.push_back({dir, u[dir_idx]}); - } else { - missed = dir; - } - } - if (dirs.size() == 2 && missed == X2DIR) { - dirs = {dirs[1], dirs[0]}; // Make sure we are in cyclic order - } - return dirs; - } + std::vector> GetNormals() const; bool IsNode() const { return 3 == abs(static_cast(u[0])) + abs(static_cast(u[1])) + diff --git a/tst/style/cpplint.py b/tst/style/cpplint.py index 49088eda0d5b..01599cb64a6a 100755 --- a/tst/style/cpplint.py +++ b/tst/style/cpplint.py @@ -49,7 +49,6 @@ import math # for log import os import re -import sre_compile import string import sys import sysconfig diff --git a/tst/unit/test_state_descriptor.cpp b/tst/unit/test_state_descriptor.cpp index 95890f9caf11..aa7ddb888995 100644 --- a/tst/unit/test_state_descriptor.cpp +++ b/tst/unit/test_state_descriptor.cpp @@ -25,6 +25,7 @@ #include "basic_types.hpp" #include "defs.hpp" #include "interface/metadata.hpp" +#include "interface/packages.hpp" #include "interface/sparse_pool.hpp" #include "interface/state_descriptor.hpp" #include "interface/variable.hpp" From 7d5231cbf7dc67c5aeae464ac44e52597cc6cbae Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Thu, 12 Sep 2024 16:10:28 -0600 Subject: [PATCH 26/37] CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69a5b4fc816a..510e3a6def57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ ### Infrastructure (changes irrelevant to downstream codes) - +- [[PR1176]](https://github.com/parthenon-hpc-lab/parthenon/pull/1176) Move some code from header to implementation files ### Removed (removing behavior/API/varaibles/...) From 7fb2920f71ab846ad8df355bf2b845c8cbfc23e2 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Thu, 12 Sep 2024 16:15:38 -0600 Subject: [PATCH 27/37] sre_compile now deprecated --- tst/style/cpplint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tst/style/cpplint.py b/tst/style/cpplint.py index 01599cb64a6a..02412f77218f 100755 --- a/tst/style/cpplint.py +++ b/tst/style/cpplint.py @@ -837,7 +837,7 @@ def Match(pattern, s): # performance reasons; factoring it out into a separate function turns out # to be noticeably expensive. if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + _regexp_compile_cache[pattern] = re.compile(pattern) return _regexp_compile_cache[pattern].match(s) @@ -855,14 +855,14 @@ def ReplaceAll(pattern, rep, s): string with replacements made (or original string if no replacements) """ if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + _regexp_compile_cache[pattern] = re.compile(pattern) return _regexp_compile_cache[pattern].sub(rep, s) def Search(pattern, s): """Searches the string for the pattern, caching the compiled regexp.""" if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + _regexp_compile_cache[pattern] = re.compile(pattern) return _regexp_compile_cache[pattern].search(s) From 454578060fa056aa4be2b79aea5fd0c27f8184de Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 12 Sep 2024 23:03:54 -0600 Subject: [PATCH 28/37] Store block gid and neighbor refinement levels in sparse packs (#1167) * No testing for now * CHANGELOG * Appears to work --- CHANGELOG.md | 1 + src/interface/sparse_pack.hpp | 13 +++++++++++++ src/interface/sparse_pack_base.cpp | 23 +++++++++++++++++++++++ src/interface/sparse_pack_base.hpp | 4 ++++ src/outputs/parthenon_xdmf.cpp | 1 - 5 files changed, 41 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69a5b4fc816a..1a9e78069d7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ Date: 2024-08-30 ### Added (new features/APIs/variables/...) +- [[PR 1167]](https://github.com/parthenon-hpc-lab/parthenon/pull/1167) Store block gid and neighbor refinement levels in sparse packs - [[PR 1151]](https://github.com/parthenon-hpc-lab/parthenon/pull/1151) Add time offset `c` to LowStorageIntegrator - [[PR 1147]](https://github.com/parthenon-hpc-lab/parthenon/pull/1147) Add `par_reduce_inner` functions - [[PR 1159]](https://github.com/parthenon-hpc-lab/parthenon/pull/1159) Add additional timestep controllers in parthenon/time. diff --git a/src/interface/sparse_pack.hpp b/src/interface/sparse_pack.hpp index e939e3dc7004..8b9803dd0b79 100644 --- a/src/interface/sparse_pack.hpp +++ b/src/interface/sparse_pack.hpp @@ -189,6 +189,19 @@ class SparsePack : public SparsePackBase { return bounds_h_(1, b, vidx); } + KOKKOS_INLINE_FUNCTION int GetLevel(const int b, const int off3, const int off2, + const int off1) const { + return block_props_(b, (off3 + 1) + 3 * ((off2 + 1) + 3 * (off1 + 1))); + } + + KOKKOS_INLINE_FUNCTION int GetGID(const int b) const { return block_props_(b, 27); } + + int GetLevelHost(const int b, const int off3, const int off2, const int off1) const { + return block_props_h_(b, (off3 + 1) + 3 * ((off2 + 1) + 3 * (off1 + 1))); + } + + int GetGIDHost(const int b) const { return block_props_h_(b, 27); } + // Number of components of a variable on a block template KOKKOS_INLINE_FUNCTION int GetSize(const int b, const T &t) const { diff --git a/src/interface/sparse_pack_base.cpp b/src/interface/sparse_pack_base.cpp index d4ead84113c8..1266f24e1a04 100644 --- a/src/interface/sparse_pack_base.cpp +++ b/src/interface/sparse_pack_base.cpp @@ -162,6 +162,10 @@ SparsePackBase SparsePackBase::Build(T *pmd, const PackDescriptor &desc, pack.bounds_ = bounds_t("bounds", 2, nblocks, nvar + 1); pack.bounds_h_ = Kokkos::create_mirror_view(pack.bounds_); + // This array stores refinement levels of current block and all neighboring blocks. + pack.block_props_ = block_props_t("block_props", nblocks, 27 + 1); + pack.block_props_h_ = Kokkos::create_mirror_view(pack.block_props_); + pack.coords_ = coords_t("coords", desc.flat ? max_size : nblocks); auto coords_h = Kokkos::create_mirror_view(pack.coords_); @@ -180,6 +184,24 @@ SparsePackBase SparsePackBase::Build(T *pmd, const PackDescriptor &desc, coords_h(b) = pmbd->GetBlockPointer()->coords_device; } + // Initialize block refinement levels to current block level to provide default if + // neighbors not present + for (int n = 0; n < 27; n++) { + pack.block_props_h_(blidx, (1 + 3 * (1 + 3 * 1))) = + pmbd->GetBlockPointer()->loc.level(); + } + // This block's gid stored in central (1, 1, 1, 1) element + pack.block_props_h_(blidx, 27) = pmbd->GetBlockPointer()->gid; + for (auto &neighbor : pmbd->GetBlockPointer()->neighbors) { + // Multiple refined neighbors may write to the same index but they will always have + // the same refinement level. + pack.block_props_h_( + blidx, (neighbor.offsets[2] + 1) + + 3 * ((neighbor.offsets[1] + 1) + 3 * (neighbor.offsets[0] + 1))) = + neighbor.loc.level(); + // Currently not storing neighbor gids + } + for (int i = 0; i < nvar; ++i) { pack.bounds_h_(0, blidx, i) = idx; for (const auto &[var_name, uid] : desc.var_groups[i]) { @@ -278,6 +300,7 @@ SparsePackBase SparsePackBase::Build(T *pmd, const PackDescriptor &desc, }); Kokkos::deep_copy(pack.pack_, pack.pack_h_); Kokkos::deep_copy(pack.bounds_, pack.bounds_h_); + Kokkos::deep_copy(pack.block_props_, pack.block_props_h_); Kokkos::deep_copy(pack.coords_, coords_h); return pack; diff --git a/src/interface/sparse_pack_base.hpp b/src/interface/sparse_pack_base.hpp index 4b01547d7b04..0deca487a20a 100644 --- a/src/interface/sparse_pack_base.hpp +++ b/src/interface/sparse_pack_base.hpp @@ -59,6 +59,8 @@ class SparsePackBase { using pack_h_t = typename pack_t::HostMirror; using bounds_t = ParArray3D; using bounds_h_t = typename bounds_t::HostMirror; + using block_props_t = ParArray2D; + using block_props_h_t = typename block_props_t::HostMirror; using coords_t = ParArray1D>; // Returns a SparsePackBase object that is either newly created or taken @@ -90,6 +92,8 @@ class SparsePackBase { pack_h_t pack_h_; bounds_t bounds_; bounds_h_t bounds_h_; + block_props_t block_props_; + block_props_h_t block_props_h_; coords_t coords_; int flx_idx_; diff --git a/src/outputs/parthenon_xdmf.cpp b/src/outputs/parthenon_xdmf.cpp index e9a596a7e28d..a5bdc34542b0 100644 --- a/src/outputs/parthenon_xdmf.cpp +++ b/src/outputs/parthenon_xdmf.cpp @@ -211,7 +211,6 @@ void genXDMF(std::string hdfFile, Mesh *pm, SimTime *tm, IndexDomain domain, int if (swarm_xdmf && all_swarm_info.all_info.size() > 0) { std::string sfilename_aux = hdfFile + ".swarm.xdmf"; std::ofstream pxdmf; - hsize_t dims[H5_NDIM] = {0}; // zero-initialized // open file pxdmf = std::ofstream(sfilename_aux.c_str(), std::ofstream::trunc); From 3deb27117521d4afd098fdfe908622f9cef286be Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Fri, 13 Sep 2024 08:42:11 -0600 Subject: [PATCH 29/37] update linter --- src/mesh/mesh.cpp | 2 + src/outputs/parthenon_hdf5.cpp | 2 + src/outputs/parthenon_hdf5_attributes.cpp | 1 + src/outputs/restart_hdf5.cpp | 1 + tst/style/cpplint.py | 12803 ++++++++++---------- tst/unit/test_swarm.cpp | 2 + 6 files changed, 6102 insertions(+), 6709 deletions(-) diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp index f88c6fc645e6..d513ec6e0b52 100644 --- a/src/mesh/mesh.cpp +++ b/src/mesh/mesh.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/src/outputs/parthenon_hdf5.cpp b/src/outputs/parthenon_hdf5.cpp index 3f7c88835c69..eb4f27d3645f 100644 --- a/src/outputs/parthenon_hdf5.cpp +++ b/src/outputs/parthenon_hdf5.cpp @@ -28,9 +28,11 @@ #include #include #include +#include #include #include #include +#include #include "driver/driver.hpp" #include "interface/metadata.hpp" diff --git a/src/outputs/parthenon_hdf5_attributes.cpp b/src/outputs/parthenon_hdf5_attributes.cpp index f745dd30d0ba..45cece79881c 100644 --- a/src/outputs/parthenon_hdf5_attributes.cpp +++ b/src/outputs/parthenon_hdf5_attributes.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/src/outputs/restart_hdf5.cpp b/src/outputs/restart_hdf5.cpp index f16bfccc52de..c7584f954b8d 100644 --- a/src/outputs/restart_hdf5.cpp +++ b/src/outputs/restart_hdf5.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include "basic_types.hpp" #include "globals.hpp" diff --git a/tst/style/cpplint.py b/tst/style/cpplint.py index 02412f77218f..f513e8fbaec0 100755 --- a/tst/style/cpplint.py +++ b/tst/style/cpplint.py @@ -42,6 +42,7 @@ """ import codecs +import collections import copy import getopt import glob @@ -58,17 +59,10 @@ # if empty, use defaults _valid_extensions = set([]) -__VERSION__ = "1.4.4" - -try: - xrange # Python 2 -except NameError: - # -- pylint: disable=redefined-builtin - xrange = range # Python 3 - +__VERSION__ = '1.7' _USAGE = """ -Syntax: cpplint.py [--verbose=#] [--output=emacs|eclipse|vs7|junit] +Syntax: cpplint.py [--verbose=#] [--output=emacs|eclipse|vs7|junit|sed|gsed] [--filter=-x,+y,...] [--counting=total|toplevel|detailed] [--root=subdir] [--repository=path] @@ -76,6 +70,8 @@ [--recursive] [--exclude=path] [--extensions=hpp,cpp,...] + [--includeorder=default|standardcfirst] + [--config=filename] [--quiet] [--version] [file] ... @@ -90,9 +86,14 @@ certain of the problem, and 1 meaning it could be a legitimate construct. This will miss some errors, and is not a substitute for a code review. - To suppress false-positive errors of a certain category, add a - 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*) - suppresses errors of all categories on that line. + To suppress false-positive errors of certain categories, add a + 'NOLINT(category[, category...])' comment to the line. NOLINT or NOLINT(*) + suppresses errors of all categories on that line. To suppress categories + on the next line use NOLINTNEXTLINE instead of NOLINT. To suppress errors in + a block of code 'NOLINTBEGIN(category[, category...])' comment to a line at + the start of the block and to end the block add a comment with 'NOLINTEND'. + NOLINT blocks are inclusive so any statements on the same line as a BEGIN + or END will have the error suppression applied. The files passed in will be linted; at least one file must be provided. Default linted extensions are %s. @@ -101,11 +102,16 @@ Flags: - output=emacs|eclipse|vs7|junit + output=emacs|eclipse|vs7|junit|sed|gsed By default, the output is formatted to ease emacs parsing. Visual Studio compatible output (vs7) may also be used. Further support exists for eclipse (eclipse), and JUnit (junit). XML parsers such as those used - in Jenkins and Bamboo may also be used. Other formats are unsupported. + in Jenkins and Bamboo may also be used. + The sed format outputs sed commands that should fix some of the errors. + Note that this requires gnu sed. If that is installed as gsed on your + system (common e.g. on macOS with homebrew) you can use the gsed output + format. Sed commands are written to stdout, not stderr, so you should be + able to pipe output straight to a shell to run the fixes. verbose=# Specify a number 0-5 to restrict errors to certain verbosity levels. @@ -120,22 +126,30 @@ error messages whose category names pass the filters will be printed. (Category names are printed with the message and look like "[whitespace/indent]".) Filters are evaluated left to right. - "-FOO" and "FOO" means "do not print categories that start with FOO". + "-FOO" means "do not print categories that start with FOO". "+FOO" means "do print categories that start with FOO". Examples: --filter=-whitespace,+whitespace/braces - --filter=whitespace,runtime/printf,+runtime/printf_format + --filter=-whitespace,-runtime/printf,+runtime/printf_format --filter=-,+build/include_what_you_use To see a list of all the categories used in cpplint, pass no arg: --filter= + Filters can directly be limited to files and also line numbers. The + syntax is category:file:line , where line is optional. The filter limitation + works for both + and - and can be combined with ordinary filters: + + Examples: --filter=-whitespace:foo.h,+whitespace/braces:foo.h + --filter=-whitespace,-runtime/printf:foo.h:14,+runtime/printf_format:foo.h + --filter=-,+build/include_what_you_use:foo.h:321 + counting=total|toplevel|detailed The total number of errors found is always printed. If 'toplevel' is provided, then the count of errors in each of the top-level categories like 'build' and 'whitespace' will also be printed. If 'detailed' is provided, then a count - is provided for each category like 'build/class'. + is provided for each category like 'legal/copyright'. repository=path The top level directory of the repository, used to derive the header @@ -208,6 +222,18 @@ Examples: --extensions=%s + includeorder=default|standardcfirst + For the build/include_order rule, the default is to blindly assume angle + bracket includes with file extension are c-system-headers (default), + even knowing this will have false classifications. + The default is established at google. + standardcfirst means to instead use an allow-list of known c headers and + treat all others as separate group of "other system headers". The C headers + included are those of the C-standard lib and closely related ones. + + config=filename + Search for config files with the specified name instead of CPPLINT.cfg + headers=x,y,... The header extensions that cpplint will treat as .h in checks. Values are automatically added to --extensions list. @@ -267,267 +293,513 @@ # If you add a new error message with a new category, add it to the list # here! cpplint_unittest.py should tell you if you forget to do this. _ERROR_CATEGORIES = [ - "build/class", - "build/c++11", - "build/c++14", - "build/c++tr1", - "build/deprecated", - "build/endif_comment", - "build/explicit_make_pair", - "build/forward_decl", - "build/header_guard", - "build/include", - "build/include_subdir", - "build/include_alpha", - "build/include_order", - "build/include_what_you_use", - "build/namespaces_literals", - "build/namespaces", - "build/printf_format", - "build/storage_class", - "legal/copyright", - "readability/alt_tokens", - "readability/braces", - "readability/casting", - "readability/check", - "readability/constructors", - "readability/fn_size", - "readability/inheritance", - "readability/multiline_comment", - "readability/multiline_string", - "readability/namespace", - "readability/nolint", - "readability/nul", - "readability/strings", - "readability/todo", - "readability/utf8", - "runtime/arrays", - "runtime/casting", - "runtime/explicit", - "runtime/int", - "runtime/init", - "runtime/invalid_increment", - "runtime/member_string_references", - "runtime/memset", - "runtime/indentation_namespace", - "runtime/operator", - "runtime/printf", - "runtime/printf_format", - "runtime/references", - "runtime/string", - "runtime/threadsafe_fn", - "runtime/vlog", - "whitespace/blank_line", - "whitespace/braces", - "whitespace/comma", - "whitespace/comments", - "whitespace/empty_conditional_body", - "whitespace/empty_if_body", - "whitespace/empty_loop_body", - "whitespace/end_of_line", - "whitespace/ending_newline", - "whitespace/forcolon", - "whitespace/indent", - "whitespace/line_length", - "whitespace/newline", - "whitespace/operators", - "whitespace/parens", - "whitespace/semicolon", - "whitespace/tab", - "whitespace/todo", + 'build/c++11', + 'build/c++17', + 'build/deprecated', + 'build/endif_comment', + 'build/explicit_make_pair', + 'build/forward_decl', + 'build/header_guard', + 'build/include', + 'build/include_subdir', + 'build/include_alpha', + 'build/include_order', + 'build/include_what_you_use', + 'build/namespaces_headers', + 'build/namespaces_literals', + 'build/namespaces', + 'build/printf_format', + 'build/storage_class', + 'legal/copyright', + 'readability/alt_tokens', + 'readability/braces', + 'readability/casting', + 'readability/check', + 'readability/constructors', + 'readability/fn_size', + 'readability/inheritance', + 'readability/multiline_comment', + 'readability/multiline_string', + 'readability/namespace', + 'readability/nolint', + 'readability/nul', + 'readability/strings', + 'readability/todo', + 'readability/utf8', + 'runtime/arrays', + 'runtime/casting', + 'runtime/explicit', + 'runtime/int', + 'runtime/init', + 'runtime/invalid_increment', + 'runtime/member_string_references', + 'runtime/memset', + 'runtime/operator', + 'runtime/printf', + 'runtime/printf_format', + 'runtime/references', + 'runtime/string', + 'runtime/threadsafe_fn', + 'runtime/vlog', + 'whitespace/blank_line', + 'whitespace/braces', + 'whitespace/comma', + 'whitespace/comments', + 'whitespace/empty_conditional_body', + 'whitespace/empty_if_body', + 'whitespace/empty_loop_body', + 'whitespace/end_of_line', + 'whitespace/ending_newline', + 'whitespace/forcolon', + 'whitespace/indent', + 'whitespace/indent_namespace', + 'whitespace/line_length', + 'whitespace/newline', + 'whitespace/operators', + 'whitespace/parens', + 'whitespace/semicolon', + 'whitespace/tab', + 'whitespace/todo', + ] + +# keywords to use with --outputs which generate stdout for machine processing +_MACHINE_OUTPUTS = [ + 'junit', + 'sed', + 'gsed' ] # These error categories are no longer enforced by cpplint, but for backwards- # compatibility they may still appear in NOLINT comments. _LEGACY_ERROR_CATEGORIES = [ - "readability/streams", - "readability/function", -] + 'build/class', + 'readability/streams', + 'readability/function', + ] + +# These prefixes for categories should be ignored since they relate to other +# tools which also use the NOLINT syntax, e.g. clang-tidy. +_OTHER_NOLINT_CATEGORY_PREFIXES = [ + 'clang-analyzer-', + 'abseil-', + 'altera-', + 'android-', + 'boost-', + 'bugprone-', + 'cert-', + 'concurrency-', + 'cppcoreguidelines-', + 'darwin-', + 'fuchsia-', + 'google-', + 'hicpp-', + 'linuxkernel-', + 'llvm-', + 'llvmlibc-', + 'misc-', + 'modernize-', + 'mpi-', + 'objc-', + 'openmp-', + 'performance-', + 'portability-', + 'readability-', + 'zircon-', + ] # The default state of the category filter. This is overridden by the --filter= # flag. By default all errors are on, so only add here categories that should be # off by default (i.e., categories that must be enabled by the --filter= flags). # All entries here should start with a '-' or '+', as in the --filter= flag. -_DEFAULT_FILTERS = ["-build/include_alpha"] +_DEFAULT_FILTERS = [ + '-build/include_alpha', + '-readability/fn_size', + ] # The default list of categories suppressed for C (not C++) files. _DEFAULT_C_SUPPRESSED_CATEGORIES = [ - "readability/casting", -] + 'readability/casting', + ] # The default list of categories suppressed for Linux Kernel files. _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [ - "whitespace/tab", -] + 'whitespace/tab', + ] # We used to check for high-bit characters, but after much discussion we # decided those were OK, as long as they were in UTF-8 and didn't represent # hard-coded international strings, which belong in a separate i18n file. # C++ headers -_CPP_HEADERS = frozenset( - [ - # Legacy - "algobase.h", - "algo.h", - "alloc.h", - "builtinbuf.h", - "bvector.h", - "complex.h", - "defalloc.h", - "deque.h", - "editbuf.h", - "fstream.h", - "function.h", - "hash_map", - "hash_map.h", - "hash_set", - "hash_set.h", - "hashtable.h", - "heap.h", - "indstream.h", - "iomanip.h", - "iostream.h", - "istream.h", - "iterator.h", - "list.h", - "map.h", - "multimap.h", - "multiset.h", - "ostream.h", - "pair.h", - "parsestream.h", - "pfstream.h", - "procbuf.h", - "pthread_alloc", - "pthread_alloc.h", - "rope", - "rope.h", - "ropeimpl.h", - "set.h", - "slist", - "slist.h", - "stack.h", - "stdiostream.h", - "stl_alloc.h", - "stl_relops.h", - "streambuf.h", - "stream.h", - "strfile.h", - "strstream.h", - "tempbuf.h", - "tree.h", - "type_traits.h", - "vector.h", - # 17.6.1.2 C++ library headers - "algorithm", - "array", - "atomic", - "bitset", - "chrono", - "codecvt", - "complex", - "condition_variable", - "deque", - "exception", - "forward_list", - "fstream", - "functional", - "future", - "initializer_list", - "iomanip", - "ios", - "iosfwd", - "iostream", - "istream", - "iterator", - "limits", - "list", - "locale", - "map", - "memory", - "mutex", - "new", - "numeric", - "ostream", - "queue", - "random", - "ratio", - "regex", - "scoped_allocator", - "set", - "sstream", - "stack", - "stdexcept", - "streambuf", - "string", - "strstream", - "system_error", - "thread", - "tuple", - "typeindex", - "typeinfo", - "type_traits", - "unordered_map", - "unordered_set", - "utility", - "valarray", - "vector", - # 17.6.1.2 C++14 headers - "shared_mutex", - # 17.6.1.2 C++17 headers - "any", - "charconv", - "codecvt", - "execution", - "filesystem", - "memory_resource", - "optional", - "string_view", - "variant", - # 17.6.1.2 C++ headers for C library facilities - "cassert", - "ccomplex", - "cctype", - "cerrno", - "cfenv", - "cfloat", - "cinttypes", - "ciso646", - "climits", - "clocale", - "cmath", - "csetjmp", - "csignal", - "cstdalign", - "cstdarg", - "cstdbool", - "cstddef", - "cstdint", - "cstdio", - "cstdlib", - "cstring", - "ctgmath", - "ctime", - "cuchar", - "cwchar", - "cwctype", - ] -) +_CPP_HEADERS = frozenset([ + # Legacy + 'algobase.h', + 'algo.h', + 'alloc.h', + 'builtinbuf.h', + 'bvector.h', + # 'complex.h', collides with System C header "complex.h" since C11 + 'defalloc.h', + 'deque.h', + 'editbuf.h', + 'fstream.h', + 'function.h', + 'hash_map', + 'hash_map.h', + 'hash_set', + 'hash_set.h', + 'hashtable.h', + 'heap.h', + 'indstream.h', + 'iomanip.h', + 'iostream.h', + 'istream.h', + 'iterator.h', + 'list.h', + 'map.h', + 'multimap.h', + 'multiset.h', + 'ostream.h', + 'pair.h', + 'parsestream.h', + 'pfstream.h', + 'procbuf.h', + 'pthread_alloc', + 'pthread_alloc.h', + 'rope', + 'rope.h', + 'ropeimpl.h', + 'set.h', + 'slist', + 'slist.h', + 'stack.h', + 'stdiostream.h', + 'stl_alloc.h', + 'stl_relops.h', + 'streambuf.h', + 'stream.h', + 'strfile.h', + 'strstream.h', + 'tempbuf.h', + 'tree.h', + 'type_traits.h', + 'vector.h', + # C++ library headers + 'algorithm', + 'array', + 'atomic', + 'bitset', + 'chrono', + 'codecvt', + 'complex', + 'condition_variable', + 'deque', + 'exception', + 'forward_list', + 'fstream', + 'functional', + 'future', + 'initializer_list', + 'iomanip', + 'ios', + 'iosfwd', + 'iostream', + 'istream', + 'iterator', + 'limits', + 'list', + 'locale', + 'map', + 'memory', + 'mutex', + 'new', + 'numeric', + 'ostream', + 'queue', + 'random', + 'ratio', + 'regex', + 'scoped_allocator', + 'set', + 'sstream', + 'stack', + 'stdexcept', + 'streambuf', + 'string', + 'strstream', + 'system_error', + 'thread', + 'tuple', + 'typeindex', + 'typeinfo', + 'type_traits', + 'unordered_map', + 'unordered_set', + 'utility', + 'valarray', + 'vector', + # C++14 headers + 'shared_mutex', + # C++17 headers + 'any', + 'charconv', + 'codecvt', + 'execution', + 'filesystem', + 'memory_resource', + 'optional', + 'string_view', + 'variant', + # C++20 headers + 'barrier', + 'bit', + 'compare', + 'concepts', + 'coroutine', + 'format', + 'latch' + 'numbers', + 'ranges', + 'semaphore', + 'source_location', + 'span', + 'stop_token', + 'syncstream', + 'version', + # C++23 headers + 'expected', + 'flat_map', + 'flat_set', + 'generator', + 'mdspan', + 'print', + 'spanstream', + 'stacktrace', + 'stdfloat', + # C++ headers for C library facilities + 'cassert', + 'ccomplex', + 'cctype', + 'cerrno', + 'cfenv', + 'cfloat', + 'cinttypes', + 'ciso646', + 'climits', + 'clocale', + 'cmath', + 'csetjmp', + 'csignal', + 'cstdalign', + 'cstdarg', + 'cstdbool', + 'cstddef', + 'cstdint', + 'cstdio', + 'cstdlib', + 'cstring', + 'ctgmath', + 'ctime', + 'cuchar', + 'cwchar', + 'cwctype', + ]) + +# C headers +_C_HEADERS = frozenset([ + # System C headers + 'assert.h', + 'complex.h', + 'ctype.h', + 'errno.h', + 'fenv.h', + 'float.h', + 'inttypes.h', + 'iso646.h', + 'limits.h', + 'locale.h', + 'math.h', + 'setjmp.h', + 'signal.h', + 'stdalign.h', + 'stdarg.h', + 'stdatomic.h', + 'stdbool.h', + 'stddef.h', + 'stdint.h', + 'stdio.h', + 'stdlib.h', + 'stdnoreturn.h', + 'string.h', + 'tgmath.h', + 'threads.h', + 'time.h', + 'uchar.h', + 'wchar.h', + 'wctype.h', + # C23 headers + 'stdbit.h', + 'stdckdint.h', + # additional POSIX C headers + 'aio.h', + 'arpa/inet.h', + 'cpio.h', + 'dirent.h', + 'dlfcn.h', + 'fcntl.h', + 'fmtmsg.h', + 'fnmatch.h', + 'ftw.h', + 'glob.h', + 'grp.h', + 'iconv.h', + 'langinfo.h', + 'libgen.h', + 'monetary.h', + 'mqueue.h', + 'ndbm.h', + 'net/if.h', + 'netdb.h', + 'netinet/in.h', + 'netinet/tcp.h', + 'nl_types.h', + 'poll.h', + 'pthread.h', + 'pwd.h', + 'regex.h', + 'sched.h', + 'search.h', + 'semaphore.h', + 'setjmp.h', + 'signal.h', + 'spawn.h', + 'strings.h', + 'stropts.h', + 'syslog.h', + 'tar.h', + 'termios.h', + 'trace.h', + 'ulimit.h', + 'unistd.h', + 'utime.h', + 'utmpx.h', + 'wordexp.h', + # additional GNUlib headers + 'a.out.h', + 'aliases.h', + 'alloca.h', + 'ar.h', + 'argp.h', + 'argz.h', + 'byteswap.h', + 'crypt.h', + 'endian.h', + 'envz.h', + 'err.h', + 'error.h', + 'execinfo.h', + 'fpu_control.h', + 'fstab.h', + 'fts.h', + 'getopt.h', + 'gshadow.h', + 'ieee754.h', + 'ifaddrs.h', + 'libintl.h', + 'mcheck.h', + 'mntent.h', + 'obstack.h', + 'paths.h', + 'printf.h', + 'pty.h', + 'resolv.h', + 'shadow.h', + 'sysexits.h', + 'ttyent.h', + # Additional linux glibc headers + 'dlfcn.h', + 'elf.h', + 'features.h', + 'gconv.h', + 'gnu-versions.h', + 'lastlog.h', + 'libio.h', + 'link.h', + 'malloc.h', + 'memory.h', + 'netash/ash.h', + 'netatalk/at.h', + 'netax25/ax25.h', + 'neteconet/ec.h', + 'netipx/ipx.h', + 'netiucv/iucv.h', + 'netpacket/packet.h', + 'netrom/netrom.h', + 'netrose/rose.h', + 'nfs/nfs.h', + 'nl_types.h', + 'nss.h', + 're_comp.h', + 'regexp.h', + 'sched.h', + 'sgtty.h', + 'stab.h', + 'stdc-predef.h', + 'stdio_ext.h', + 'syscall.h', + 'termio.h', + 'thread_db.h', + 'ucontext.h', + 'ustat.h', + 'utmp.h', + 'values.h', + 'wait.h', + 'xlocale.h', + # Hardware specific headers + 'arm_neon.h', + 'emmintrin.h', + 'xmmintin.h', + ]) + +# Folders of C libraries so commonly used in C++, +# that they have parity with standard C libraries. +C_STANDARD_HEADER_FOLDERS = frozenset([ + # standard C library + "sys", + # glibc for linux + "arpa", + "asm-generic", + "bits", + "gnu", + "net", + "netinet", + "protocols", + "rpc", + "rpcsvc", + "scsi", + # linux kernel header + "drm", + "linux", + "misc", + "mtd", + "rdma", + "sound", + "video", + "xen", + ]) # Type names _TYPES = re.compile( - r"^(?:" + r'^(?:' # [dcl.type.simple] - r"(char(16_t|32_t)?)|wchar_t|" - r"bool|short|int|long|signed|unsigned|float|double|" + r'(char(16_t|32_t)?)|wchar_t|' + r'bool|short|int|long|signed|unsigned|float|double|' # [support.types] - r"(ptrdiff_t|size_t|max_align_t|nullptr_t)|" + r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|' # [cstdint.syn] - r"(u?int(_fast|_least)?(8|16|32|64)_t)|" - r"(u?int(max|ptr)_t)|" - r")$" -) + r'(u?int(_fast|_least)?(8|16|32|64)_t)|' + r'(u?int(max|ptr)_t)|' + r')$') # These headers are excluded from [build/include] and [build/include_order] @@ -536,53 +808,39 @@ # uppercase character, such as Python.h or nsStringAPI.h, for example). # - Lua headers. _THIRD_PARTY_HEADERS_PATTERN = re.compile( - r"^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$" -) + r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$') # Pattern for matching FileInfo.BaseName() against test file name -_test_suffixes = ["_test", "_regtest", "_unittest"] -_TEST_FILE_SUFFIX = "(" + "|".join(_test_suffixes) + r")$" +_test_suffixes = ['_test', '_regtest', '_unittest'] +_TEST_FILE_SUFFIX = '(' + '|'.join(_test_suffixes) + r')$' # Pattern that matches only complete whitespace, possibly across multiple lines. -_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r"^\s*$", re.DOTALL) +_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL) # Assertion macros. These are defined in base/logging.h and # testing/base/public/gunit.h. _CHECK_MACROS = [ - "DCHECK", - "CHECK", - "EXPECT_TRUE", - "ASSERT_TRUE", - "EXPECT_FALSE", - "ASSERT_FALSE", -] + 'DCHECK', 'CHECK', + 'EXPECT_TRUE', 'ASSERT_TRUE', + 'EXPECT_FALSE', 'ASSERT_FALSE', + ] # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE _CHECK_REPLACEMENT = dict([(macro_var, {}) for macro_var in _CHECK_MACROS]) -for op, replacement in [ - ("==", "EQ"), - ("!=", "NE"), - (">=", "GE"), - (">", "GT"), - ("<=", "LE"), - ("<", "LT"), -]: - _CHECK_REPLACEMENT["DCHECK"][op] = "DCHECK_%s" % replacement - _CHECK_REPLACEMENT["CHECK"][op] = "CHECK_%s" % replacement - _CHECK_REPLACEMENT["EXPECT_TRUE"][op] = "EXPECT_%s" % replacement - _CHECK_REPLACEMENT["ASSERT_TRUE"][op] = "ASSERT_%s" % replacement - -for op, inv_replacement in [ - ("==", "NE"), - ("!=", "EQ"), - (">=", "LT"), - (">", "LE"), - ("<=", "GT"), - ("<", "GE"), -]: - _CHECK_REPLACEMENT["EXPECT_FALSE"][op] = "EXPECT_%s" % inv_replacement - _CHECK_REPLACEMENT["ASSERT_FALSE"][op] = "ASSERT_%s" % inv_replacement +for op, replacement in [('==', 'EQ'), ('!=', 'NE'), + ('>=', 'GE'), ('>', 'GT'), + ('<=', 'LE'), ('<', 'LT')]: + _CHECK_REPLACEMENT['DCHECK'][op] = f'DCHECK_{replacement}' + _CHECK_REPLACEMENT['CHECK'][op] = f'CHECK_{replacement}' + _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = f'EXPECT_{replacement}' + _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = f'ASSERT_{replacement}' + +for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), + ('>=', 'LT'), ('>', 'LE'), + ('<=', 'GT'), ('<', 'GE')]: + _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = f'EXPECT_{inv_replacement}' + _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = f'ASSERT_{inv_replacement}' # Alternative tokens and their replacements. For full list, see section 2.5 # Alternative tokens [lex.digraph] in the C++ standard. @@ -590,18 +848,18 @@ # Digraphs (such as '%:') are not included here since it's a mess to # match those on a word boundary. _ALT_TOKEN_REPLACEMENT = { - "and": "&&", - "bitor": "|", - "or": "||", - "xor": "^", - "compl": "~", - "bitand": "&", - "and_eq": "&=", - "or_eq": "|=", - "xor_eq": "^=", - "not": "!", - "not_eq": "!=", -} + 'and': '&&', + 'bitor': '|', + 'or': '||', + 'xor': '^', + 'compl': '~', + 'bitand': '&', + 'and_eq': '&=', + 'or_eq': '|=', + 'xor_eq': '^=', + 'not': '!', + 'not_eq': '!=' + } # Compile regular expression that matches all the above keywords. The "[ =()]" # bit is meant to avoid matching these keywords outside of boolean expressions. @@ -609,38 +867,51 @@ # False positives include C-style multi-line comments and multi-line strings # but those have always been troublesome for cpplint. _ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( - r"[ =()](" + ("|".join(_ALT_TOKEN_REPLACEMENT.keys())) + r")(?=[ (]|$)" -) + r'([ =()])(' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')([ (]|$)') # These constants define types of headers for use with # _IncludeState.CheckNextIncludeOrder(). _C_SYS_HEADER = 1 _CPP_SYS_HEADER = 2 -_LIKELY_MY_HEADER = 3 -_POSSIBLE_MY_HEADER = 4 -_OTHER_HEADER = 5 +_OTHER_SYS_HEADER = 3 +_LIKELY_MY_HEADER = 4 +_POSSIBLE_MY_HEADER = 5 +_OTHER_HEADER = 6 # These constants define the current inline assembly state -_NO_ASM = 0 # Outside of inline assembly block -_INSIDE_ASM = 1 # Inside inline assembly block -_END_ASM = 2 # Last line of inline assembly block -_BLOCK_ASM = 3 # The whole block is an inline assembly block +_NO_ASM = 0 # Outside of inline assembly block +_INSIDE_ASM = 1 # Inside inline assembly block +_END_ASM = 2 # Last line of inline assembly block +_BLOCK_ASM = 3 # The whole block is an inline assembly block # Match start of assembly blocks -_MATCH_ASM = re.compile( - r"^\s*(?:asm|_asm|__asm|__asm__)" r"(?:\s+(volatile|__volatile__))?" r"\s*[{(]" -) +_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' + r'(?:\s+(volatile|__volatile__))?' + r'\s*[{(]') # Match strings that indicate we're working on a C (not C++) file. -_SEARCH_C_FILE = re.compile( - r"\b(?:LINT_C_FILE|" r"vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))" -) +_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|' + r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))') # Match string that indicates we're working on a Linux Kernel file. -_SEARCH_KERNEL_FILE = re.compile(r"\b(?:LINT_KERNEL_FILE)") - -_regexp_compile_cache = {} +_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)') + +# Commands for sed to fix the problem +_SED_FIXUPS = { + 'Remove spaces around =': r's/ = /=/', + 'Remove spaces around !=': r's/ != /!=/', + 'Remove space before ( in if (': r's/if (/if(/', + 'Remove space before ( in for (': r's/for (/for(/', + 'Remove space before ( in while (': r's/while (/while(/', + 'Remove space before ( in switch (': r's/switch (/switch(/', + 'Should have a space between // and comment': r's/\/\//\/\/ /', + 'Missing space before {': r's/\([^ ]\){/\1 {/', + 'Tab found, replace by spaces': r's/\t/ /g', + 'Line ends in whitespace. Consider deleting these extra spaces.': r's/\s*$//', + 'You don\'t need a ; after a }': r's/};/}/', + 'Missing space after ,': r's/,\([^ ]\)/, \1/g', +} # {str, set(int)}: a map from error categories to sets of linenumbers # on which those errors are expected and should be suppressed. @@ -659,893 +930,903 @@ # Files to exclude from linting. This is set by the --exclude flag. _excludes = None -# Whether to supress PrintInfo messages +# Whether to suppress all PrintInfo messages, UNRELATED to --quiet flag _quiet = False # The allowed line length of files. # This is set by --linelength flag. _line_length = 80 -try: - unicode -except NameError: - # -- pylint: disable=redefined-builtin - basestring = unicode = str - -try: - long -except NameError: - # -- pylint: disable=redefined-builtin - long = int - -if sys.version_info < (3,): - # -- pylint: disable=no-member - # BINARY_TYPE = str - itervalues = dict.itervalues - iteritems = dict.iteritems -else: - # BINARY_TYPE = bytes - itervalues = dict.values - iteritems = dict.items - - -def unicode_escape_decode(x): - if sys.version_info < (3,): - return codecs.unicode_escape_decode(x)[0] - else: - return x +# This allows to use different include order rule than default +_include_order = "default" +# This allows different config files to be used +_config_filename = "CPPLINT.cfg" # Treat all headers starting with 'h' equally: .h, .hpp, .hxx etc. # This is set by --headers flag. _hpp_headers = set([]) -# {str, bool}: a map from error categories to booleans which indicate if the -# category should be suppressed for every line. -_global_error_suppressions = {} +class ErrorSuppressions: + """Class to track all error suppressions for cpplint""" + class LineRange: + """Class to represent a range of line numbers for which an error is suppressed""" + def __init__(self, begin, end): + self.begin = begin + self.end = end -def ProcessHppHeadersOption(val): - global _hpp_headers - try: - _hpp_headers = {ext.strip() for ext in val.split(",")} - except ValueError: - PrintUsage("Header extensions must be comma separated list.") + def __str__(self): + return f'[{self.begin}-{self.end}]' + def __contains__(self, obj): + return self.begin <= obj <= self.end -def IsHeaderExtension(file_extension): - return file_extension in GetHeaderExtensions() + def ContainsRange(self, other): + return self.begin <= other.begin and self.end >= other.end + def __init__(self): + self._suppressions = collections.defaultdict(list) + self._open_block_suppression = None -def GetHeaderExtensions(): - if _hpp_headers: - return _hpp_headers - if _valid_extensions: - return {h for h in _valid_extensions if "h" in h} - return set(["h", "hh", "hpp", "hxx", "h++", "cuh"]) + def _AddSuppression(self, category, line_range): + suppressed = self._suppressions[category] + if not (suppressed and suppressed[-1].ContainsRange(line_range)): + suppressed.append(line_range) + def GetOpenBlockStart(self): + """:return: The start of the current open block or `-1` if there is not an open block""" + return self._open_block_suppression.begin if self._open_block_suppression else -1 -# The allowed extensions for file names -# This is set by --extensions flag -def GetAllExtensions(): - return GetHeaderExtensions().union( - _valid_extensions or set(["c", "cc", "cpp", "cxx", "c++", "cu"]) - ) + def AddGlobalSuppression(self, category): + """Add a suppression for `category` which is suppressed for the whole file""" + self._AddSuppression(category, self.LineRange(0, math.inf)) + def AddLineSuppression(self, category, linenum): + """Add a suppression for `category` which is suppressed only on `linenum`""" + self._AddSuppression(category, self.LineRange(linenum, linenum)) -def ProcessExtensionsOption(val): - global _valid_extensions - try: - extensions = [ext.strip() for ext in val.split(",")] - _valid_extensions = set(extensions) - except ValueError: - PrintUsage( - "Extensions should be a comma-separated list of values;" - "for example: extensions=hpp,cpp\n" - 'This could not be parsed: "%s"' % (val,) - ) + def StartBlockSuppression(self, category, linenum): + """Start a suppression block for `category` on `linenum`. inclusive""" + if self._open_block_suppression is None: + self._open_block_suppression = self.LineRange(linenum, math.inf) + self._AddSuppression(category, self._open_block_suppression) + def EndBlockSuppression(self, linenum): + """End the current block suppression on `linenum`. inclusive""" + if self._open_block_suppression: + self._open_block_suppression.end = linenum + self._open_block_suppression = None -def GetNonHeaderExtensions(): - return GetAllExtensions().difference(GetHeaderExtensions()) + def IsSuppressed(self, category, linenum): + """:return: `True` if `category` is suppressed for `linenum`""" + suppressed = self._suppressions[category] + self._suppressions[None] + return any(linenum in lr for lr in suppressed) + def HasOpenBlock(self): + """:return: `True` if a block suppression was started but not ended""" + return self._open_block_suppression is not None -def ParseNolintSuppressions(filename, raw_line, linenum, error): - """Updates the global list of line error-suppressions. + def Clear(self): + """Clear all current error suppressions""" + self._suppressions.clear() + self._open_block_suppression = None - Parses any NOLINT comments on the current line, updating the global - error_suppressions store. Reports an error if the NOLINT comment - was malformed. +_error_suppressions = ErrorSuppressions() - Args: - filename: str, the name of the input file. - raw_line: str, the line of input text, with comments. - linenum: int, the number of the current line. - error: function, an error handler. - """ - matched = Search(r"\bNOLINT(NEXTLINE)?\b(\([^)]+\))?", raw_line) - if matched: - if matched.group(1): - suppressed_line = linenum + 1 - else: - suppressed_line = linenum - category = matched.group(2) - if category in (None, "(*)"): # => "suppress all" - _error_suppressions.setdefault(None, set()).add(suppressed_line) - else: - if category.startswith("(") and category.endswith(")"): - category = category[1:-1] - if category in _ERROR_CATEGORIES: - _error_suppressions.setdefault(category, set()).add(suppressed_line) - elif category not in _LEGACY_ERROR_CATEGORIES: - error( - filename, - linenum, - "readability/nolint", - 5, - "Unknown NOLINT error category: %s" % category, - ) +def ProcessHppHeadersOption(val): + global _hpp_headers + try: + _hpp_headers = {ext.strip() for ext in val.split(',')} + except ValueError: + PrintUsage('Header extensions must be comma separated list.') + +def ProcessIncludeOrderOption(val): + if val is None or val == "default": + pass + elif val == "standardcfirst": + global _include_order + _include_order = val + else: + PrintUsage('Invalid includeorder value %s. Expected default|standardcfirst') + +def IsHeaderExtension(file_extension): + return file_extension in GetHeaderExtensions() + +def GetHeaderExtensions(): + if _hpp_headers: + return _hpp_headers + if _valid_extensions: + return {h for h in _valid_extensions if 'h' in h} + return set(['h', 'hh', 'hpp', 'hxx', 'h++', 'cuh']) + +# The allowed extensions for file names +# This is set by --extensions flag +def GetAllExtensions(): + return GetHeaderExtensions().union(_valid_extensions or set( + ['c', 'cc', 'cpp', 'cxx', 'c++', 'cu'])) + +def ProcessExtensionsOption(val): + global _valid_extensions + try: + extensions = [ext.strip() for ext in val.split(',')] + _valid_extensions = set(extensions) + except ValueError: + PrintUsage('Extensions should be a comma-separated list of values;' + 'for example: extensions=hpp,cpp\n' + f'This could not be parsed: "{val}"') + +def GetNonHeaderExtensions(): + return GetAllExtensions().difference(GetHeaderExtensions()) +def ParseNolintSuppressions(filename, raw_line, linenum, error): + """Updates the global list of line error-suppressions. + + Parses any NOLINT comments on the current line, updating the global + error_suppressions store. Reports an error if the NOLINT comment + was malformed. + + Args: + filename: str, the name of the input file. + raw_line: str, the line of input text, with comments. + linenum: int, the number of the current line. + error: function, an error handler. + """ + matched = re.search(r'\bNOLINT(NEXTLINE|BEGIN|END)?\b(\([^)]+\))?', raw_line) + if matched: + no_lint_type = matched.group(1) + if no_lint_type == 'NEXTLINE': + def ProcessCategory(category): + _error_suppressions.AddLineSuppression(category, linenum + 1) + elif no_lint_type == 'BEGIN': + if _error_suppressions.HasOpenBlock(): + error(filename, linenum, 'readability/nolint', 5, + f'NONLINT block already defined on line {_error_suppressions.GetOpenBlockStart()}') + + def ProcessCategory(category): + _error_suppressions.StartBlockSuppression(category, linenum) + elif no_lint_type == 'END': + if not _error_suppressions.HasOpenBlock(): + error(filename, linenum, 'readability/nolint', 5, 'Not in a NOLINT block') + + def ProcessCategory(category): + if category is not None: + error(filename, linenum, 'readability/nolint', 5, + f'NOLINT categories not supported in block END: {category}') + _error_suppressions.EndBlockSuppression(linenum) + else: + def ProcessCategory(category): + _error_suppressions.AddLineSuppression(category, linenum) + categories = matched.group(2) + if categories in (None, '(*)'): # => "suppress all" + ProcessCategory(None) + elif categories.startswith('(') and categories.endswith(')'): + for category in set(map(lambda c: c.strip(), categories[1:-1].split(','))): + if category in _ERROR_CATEGORIES: + ProcessCategory(category) + elif any(c for c in _OTHER_NOLINT_CATEGORY_PREFIXES if category.startswith(c)): + # Ignore any categories from other tools. + pass + elif category not in _LEGACY_ERROR_CATEGORIES: + error(filename, linenum, 'readability/nolint', 5, + f'Unknown NOLINT error category: {category}') def ProcessGlobalSuppresions(lines): - """Updates the list of global error suppressions. + """Deprecated; use ProcessGlobalSuppressions.""" + ProcessGlobalSuppressions(lines) - Parses any lint directives in the file that have global effect. +def ProcessGlobalSuppressions(lines): + """Updates the list of global error suppressions. - Args: - lines: An array of strings, each representing a line of the file, with the - last element being empty if the file is terminated with a newline. - """ - for line in lines: - if _SEARCH_C_FILE.search(line): - for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: - _global_error_suppressions[category] = True - if _SEARCH_KERNEL_FILE.search(line): - for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: - _global_error_suppressions[category] = True + Parses any lint directives in the file that have global effect. + + Args: + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + """ + for line in lines: + if _SEARCH_C_FILE.search(line): + for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: + _error_suppressions.AddGlobalSuppression(category) + if _SEARCH_KERNEL_FILE.search(line): + for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: + _error_suppressions.AddGlobalSuppression(category) def ResetNolintSuppressions(): - """Resets the set of NOLINT suppressions to empty.""" - _error_suppressions.clear() - _global_error_suppressions.clear() + """Resets the set of NOLINT suppressions to empty.""" + _error_suppressions.Clear() def IsErrorSuppressedByNolint(category, linenum): - """Returns true if the specified error category is suppressed on this line. + """Returns true if the specified error category is suppressed on this line. + + Consults the global error_suppressions map populated by + ParseNolintSuppressions/ProcessGlobalSuppressions/ResetNolintSuppressions. + + Args: + category: str, the category of the error. + linenum: int, the current line number. + Returns: + bool, True iff the error should be suppressed due to a NOLINT comment, + block suppression or global suppression. + """ + return _error_suppressions.IsSuppressed(category, linenum) - Consults the global error_suppressions map populated by - ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions. + +def _IsSourceExtension(s): + """File extension (excluding dot) matches a source file extension.""" + return s in GetNonHeaderExtensions() + + +class _IncludeState(object): + """Tracks line numbers for includes, and the order in which includes appear. + + include_list contains list of lists of (header, line number) pairs. + It's a lists of lists rather than just one flat list to make it + easier to update across preprocessor boundaries. + + Call CheckNextIncludeOrder() once for each header in the file, passing + in the type constants defined above. Calls in an illegal order will + raise an _IncludeError with an appropriate error message. + + """ + # self._section will move monotonically through this set. If it ever + # needs to move backwards, CheckNextIncludeOrder will raise an error. + _INITIAL_SECTION = 0 + _MY_H_SECTION = 1 + _C_SECTION = 2 + _CPP_SECTION = 3 + _OTHER_SYS_SECTION = 4 + _OTHER_H_SECTION = 5 + + _TYPE_NAMES = { + _C_SYS_HEADER: 'C system header', + _CPP_SYS_HEADER: 'C++ system header', + _OTHER_SYS_HEADER: 'other system header', + _LIKELY_MY_HEADER: 'header this file implements', + _POSSIBLE_MY_HEADER: 'header this file may implement', + _OTHER_HEADER: 'other header', + } + _SECTION_NAMES = { + _INITIAL_SECTION: "... nothing. (This can't be an error.)", + _MY_H_SECTION: 'a header this file implements', + _C_SECTION: 'C system header', + _CPP_SECTION: 'C++ system header', + _OTHER_SYS_SECTION: 'other system header', + _OTHER_H_SECTION: 'other header', + } + + def __init__(self): + self.include_list = [[]] + self._section = None + self._last_header = None + self.ResetSection('') + + def FindHeader(self, header): + """Check if a header has already been included. Args: - category: str, the category of the error. - linenum: int, the current line number. + header: header to check. Returns: - bool, True iff the error should be suppressed due to a NOLINT comment or - global suppression. + Line number of previous occurrence, or -1 if the header has not + been seen before. """ - return ( - _global_error_suppressions.get(category, False) - or linenum in _error_suppressions.get(category, set()) - or linenum in _error_suppressions.get(None, set()) - ) + for section_list in self.include_list: + for f in section_list: + if f[0] == header: + return f[1] + return -1 + def ResetSection(self, directive): + """Reset section checking for preprocessor directive. + + Args: + directive: preprocessor directive (e.g. "if", "else"). + """ + # The name of the current section. + self._section = self._INITIAL_SECTION + # The path of last found header. + self._last_header = '' -def Match(pattern, s): - """Matches the string with the pattern, caching the compiled regexp.""" - # The regexp compilation caching is inlined in both Match and Search for - # performance reasons; factoring it out into a separate function turns out - # to be noticeably expensive. - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = re.compile(pattern) - return _regexp_compile_cache[pattern].match(s) + # Update list of includes. Note that we never pop from the + # include list. + if directive in ('if', 'ifdef', 'ifndef'): + self.include_list.append([]) + elif directive in ('else', 'elif'): + self.include_list[-1] = [] + def SetLastHeader(self, header_path): + self._last_header = header_path -def ReplaceAll(pattern, rep, s): - """Replaces instances of pattern in a string with a replacement. + def CanonicalizeAlphabeticalOrder(self, header_path): + """Returns a path canonicalized for alphabetical comparison. - The compiled regex is kept in a cache shared by Match and Search. + - replaces "-" with "_" so they both cmp the same. + - removes '-inl' since we don't require them to be after the main header. + - lowercase everything, just in case. Args: - pattern: regex pattern - rep: replacement text - s: search string + header_path: Path to be canonicalized. Returns: - string with replacements made (or original string if no replacements) + Canonicalized path. """ - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = re.compile(pattern) - return _regexp_compile_cache[pattern].sub(rep, s) - + return header_path.replace('-inl.h', '.h').replace('-', '_').lower() -def Search(pattern, s): - """Searches the string for the pattern, caching the compiled regexp.""" - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = re.compile(pattern) - return _regexp_compile_cache[pattern].search(s) + def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): + """Check if a header is in alphabetical order with the previous header. + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + header_path: Canonicalized header to be checked. -def _IsSourceExtension(s): - """File extension (excluding dot) matches a source file extension.""" - return s in GetNonHeaderExtensions() + Returns: + Returns true if the header is in alphabetical order. + """ + # If previous section is different from current section, _last_header will + # be reset to empty string, so it's always less than current header. + # + # If previous line was a blank line, assume that the headers are + # intentionally sorted the way they are. + if (self._last_header > header_path and + re.match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])): + return False + return True + def CheckNextIncludeOrder(self, header_type): + """Returns a non-empty error message if the next header is out of order. -class _IncludeState(object): - """Tracks line numbers for includes, and the order in which includes appear. + This function also updates the internal state to be ready to check + the next include. - include_list contains list of lists of (header, line number) pairs. - It's a lists of lists rather than just one flat list to make it - easier to update across preprocessor boundaries. + Args: + header_type: One of the _XXX_HEADER constants defined above. - Call CheckNextIncludeOrder() once for each header in the file, passing - in the type constants defined above. Calls in an illegal order will - raise an _IncludeError with an appropriate error message. + Returns: + The empty string if the header is in the right order, or an + error message describing what's wrong. """ + error_message = (f'Found {self._TYPE_NAMES[header_type]}' + f' after {self._SECTION_NAMES[self._section]}') + + last_section = self._section + + if header_type == _C_SYS_HEADER: + if self._section <= self._C_SECTION: + self._section = self._C_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _CPP_SYS_HEADER: + if self._section <= self._CPP_SECTION: + self._section = self._CPP_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _OTHER_SYS_HEADER: + if self._section <= self._OTHER_SYS_SECTION: + self._section = self._OTHER_SYS_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _LIKELY_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + self._section = self._OTHER_H_SECTION + elif header_type == _POSSIBLE_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + # This will always be the fallback because we're not sure + # enough that the header is associated with this file. + self._section = self._OTHER_H_SECTION + else: + assert header_type == _OTHER_HEADER + self._section = self._OTHER_H_SECTION - # self._section will move monotonically through this set. If it ever - # needs to move backwards, CheckNextIncludeOrder will raise an error. - _INITIAL_SECTION = 0 - _MY_H_SECTION = 1 - _C_SECTION = 2 - _CPP_SECTION = 3 - _OTHER_H_SECTION = 4 - - _TYPE_NAMES = { - _C_SYS_HEADER: "C system header", - _CPP_SYS_HEADER: "C++ system header", - _LIKELY_MY_HEADER: "header this file implements", - _POSSIBLE_MY_HEADER: "header this file may implement", - _OTHER_HEADER: "other header", - } - _SECTION_NAMES = { - _INITIAL_SECTION: "... nothing. (This can't be an error.)", - _MY_H_SECTION: "a header this file implements", - _C_SECTION: "C system header", - _CPP_SECTION: "C++ system header", - _OTHER_H_SECTION: "other header", - } + if last_section != self._section: + self._last_header = '' - def __init__(self): - self.include_list = [[]] - self._section = None - self._last_header = None - self.ResetSection("") - - def FindHeader(self, header): - """Check if a header has already been included. - - Args: - header: header to check. - Returns: - Line number of previous occurrence, or -1 if the header has not - been seen before. - """ - for section_list in self.include_list: - for f in section_list: - if f[0] == header: - return f[1] - return -1 - - def ResetSection(self, directive): - """Reset section checking for preprocessor directive. - - Args: - directive: preprocessor directive (e.g. "if", "else"). - """ - # The name of the current section. - self._section = self._INITIAL_SECTION - # The path of last found header. - self._last_header = "" - - # Update list of includes. Note that we never pop from the - # include list. - if directive in ("if", "ifdef", "ifndef"): - self.include_list.append([]) - elif directive in ("else", "elif"): - self.include_list[-1] = [] - - def SetLastHeader(self, header_path): - self._last_header = header_path - - def CanonicalizeAlphabeticalOrder(self, header_path): - """Returns a path canonicalized for alphabetical comparison. - - - replaces "-" with "_" so they both cmp the same. - - removes '-inl' since we don't require them to be after the main header. - - lowercase everything, just in case. - - Args: - header_path: Path to be canonicalized. - - Returns: - Canonicalized path. - """ - return header_path.replace("-inl.h", ".h").replace("-", "_").lower() - - def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): - """Check if a header is in alphabetical order with the previous header. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - header_path: Canonicalized header to be checked. - - Returns: - Returns true if the header is in alphabetical order. - """ - # If previous section is different from current section, _last_header will - # be reset to empty string, so it's always less than current header. - # - # If previous line was a blank line, assume that the headers are - # intentionally sorted the way they are. - if self._last_header > header_path and Match( - r"^\s*#\s*include\b", clean_lines.elided[linenum - 1] - ): - return False - return True + return '' - def CheckNextIncludeOrder(self, header_type): - """Returns a non-empty error message if the next header is out of order. - - This function also updates the internal state to be ready to check - the next include. - - Args: - header_type: One of the _XXX_HEADER constants defined above. - - Returns: - The empty string if the header is in the right order, or an - error message describing what's wrong. - - """ - error_message = "Found %s after %s" % ( - self._TYPE_NAMES[header_type], - self._SECTION_NAMES[self._section], - ) - - last_section = self._section - - if header_type == _C_SYS_HEADER: - if self._section <= self._C_SECTION: - self._section = self._C_SECTION - else: - self._last_header = "" - return error_message - elif header_type == _CPP_SYS_HEADER: - if self._section <= self._CPP_SECTION: - self._section = self._CPP_SECTION - else: - self._last_header = "" - return error_message - elif header_type == _LIKELY_MY_HEADER: - if self._section <= self._MY_H_SECTION: - self._section = self._MY_H_SECTION - else: - self._section = self._OTHER_H_SECTION - elif header_type == _POSSIBLE_MY_HEADER: - if self._section <= self._MY_H_SECTION: - self._section = self._MY_H_SECTION - else: - # This will always be the fallback because we're not sure - # enough that the header is associated with this file. - self._section = self._OTHER_H_SECTION - else: - assert header_type == _OTHER_HEADER - self._section = self._OTHER_H_SECTION - if last_section != self._section: - self._last_header = "" +class _CppLintState(object): + """Maintains module-wide state..""" + + def __init__(self): + self.verbose_level = 1 # global setting. + self.error_count = 0 # global count of reported errors + # filters to apply when emitting error messages + self.filters = _DEFAULT_FILTERS[:] + # backup of filter list. Used to restore the state after each file. + self._filters_backup = self.filters[:] + self.counting = 'total' # In what way are we counting errors? + self.errors_by_category = {} # string to int dict storing error counts + self.quiet = False # Suppress non-error messages? + + # output format: + # "emacs" - format that emacs can parse (default) + # "eclipse" - format that eclipse can parse + # "vs7" - format that Microsoft Visual Studio 7 can parse + # "junit" - format that Jenkins, Bamboo, etc can parse + # "sed" - returns a gnu sed command to fix the problem + # "gsed" - like sed, but names the command gsed, e.g. for macOS homebrew users + self.output_format = 'emacs' + + # For JUnit output, save errors and failures until the end so that they + # can be written into the XML + self._junit_errors = [] + self._junit_failures = [] + + def SetOutputFormat(self, output_format): + """Sets the output format for errors.""" + self.output_format = output_format + + def SetQuiet(self, quiet): + """Sets the module's quiet settings, and returns the previous setting.""" + last_quiet = self.quiet + self.quiet = quiet + return last_quiet + + def SetVerboseLevel(self, level): + """Sets the module's verbosity, and returns the previous setting.""" + last_verbose_level = self.verbose_level + self.verbose_level = level + return last_verbose_level - return "" + def SetCountingStyle(self, counting_style): + """Sets the module's counting options.""" + self.counting = counting_style + def SetFilters(self, filters): + """Sets the error-message filters. -class _CppLintState(object): - """Maintains module-wide state..""" - - def __init__(self): - self.verbose_level = 1 # global setting. - self.error_count = 0 # global count of reported errors - # filters to apply when emitting error messages - self.filters = _DEFAULT_FILTERS[:] - # backup of filter list. Used to restore the state after each file. - self._filters_backup = self.filters[:] - self.counting = "total" # In what way are we counting errors? - self.errors_by_category = {} # string to int dict storing error counts - self.quiet = False # Suppress non-error messagess? - - # output format: - # "emacs" - format that emacs can parse (default) - # "eclipse" - format that eclipse can parse - # "vs7" - format that Microsoft Visual Studio 7 can parse - # "junit" - format that Jenkins, Bamboo, etc can parse - self.output_format = "emacs" - - # For JUnit output, save errors and failures until the end so that they - # can be written into the XML - self._junit_errors = [] - self._junit_failures = [] - - def SetOutputFormat(self, output_format): - """Sets the output format for errors.""" - self.output_format = output_format - - def SetQuiet(self, quiet): - """Sets the module's quiet settings, and returns the previous setting.""" - last_quiet = self.quiet - self.quiet = quiet - return last_quiet - - def SetVerboseLevel(self, level): - """Sets the module's verbosity, and returns the previous setting.""" - last_verbose_level = self.verbose_level - self.verbose_level = level - return last_verbose_level - - def SetCountingStyle(self, counting_style): - """Sets the module's counting options.""" - self.counting = counting_style - - def SetFilters(self, filters): - """Sets the error-message filters. - - These filters are applied when deciding whether to emit a given - error message. - - Args: - filters: A string of comma-separated filters (eg "+whitespace/indent"). - Each filter should start with + or -; else we die. - - Raises: - ValueError: The comma-separated filters did not all start with '+' or '-'. - E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" - """ - # Default filters always have less priority than the flag ones. - self.filters = _DEFAULT_FILTERS[:] - self.AddFilters(filters) - - def AddFilters(self, filters): - """Adds more filters to the existing list of error-message filters.""" - for filt in filters.split(","): - clean_filt = filt.strip() - if clean_filt: - self.filters.append(clean_filt) - for filt in self.filters: - if not (filt.startswith("+") or filt.startswith("-")): - raise ValueError( - "Every filter in --filters must start with + or -" - " (%s does not)" % filt - ) - - def BackupFilters(self): - """Saves the current filter list to backup storage.""" - self._filters_backup = self.filters[:] - - def RestoreFilters(self): - """Restores filters previously backed up.""" - self.filters = self._filters_backup[:] - - def ResetErrorCounts(self): - """Sets the module's error statistic back to zero.""" - self.error_count = 0 - self.errors_by_category = {} - - def IncrementErrorCount(self, category): - """Bumps the module's error statistic.""" - self.error_count += 1 - if self.counting in ("toplevel", "detailed"): - if self.counting != "detailed": - category = category.split("/")[0] - if category not in self.errors_by_category: - self.errors_by_category[category] = 0 - self.errors_by_category[category] += 1 - - def PrintErrorCounts(self): - """Print a summary of errors by category, and the total.""" - for category, count in sorted(iteritems(self.errors_by_category)): - self.PrintInfo("Category '%s' errors found: %d\n" % (category, count)) - if self.error_count > 0: - self.PrintInfo("Total errors found: %d\n" % self.error_count) - - def PrintInfo(self, message): - if not _quiet and self.output_format != "junit": - sys.stdout.write(message) - - def PrintError(self, message): - if self.output_format == "junit": - self._junit_errors.append(message) - else: - sys.stderr.write(message) + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "+whitespace/indent"). + Each filter should start with + or -; else we die. - def AddJUnitFailure(self, filename, linenum, message, category, confidence): - self._junit_failures.append((filename, linenum, message, category, confidence)) + Raises: + ValueError: The comma-separated filters did not all start with '+' or '-'. + E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" + """ + # Default filters always have less priority than the flag ones. + self.filters = _DEFAULT_FILTERS[:] + self.AddFilters(filters) + + def AddFilters(self, filters): + """ Adds more filters to the existing list of error-message filters. """ + for filt in filters.split(','): + clean_filt = filt.strip() + if clean_filt: + self.filters.append(clean_filt) + for filt in self.filters: + if not (filt.startswith('+') or filt.startswith('-')): + raise ValueError('Every filter in --filters must start with + or -' + f' ({filt} does not)') + + def BackupFilters(self): + """ Saves the current filter list to backup storage.""" + self._filters_backup = self.filters[:] + + def RestoreFilters(self): + """ Restores filters previously backed up.""" + self.filters = self._filters_backup[:] + + def ResetErrorCounts(self): + """Sets the module's error statistic back to zero.""" + self.error_count = 0 + self.errors_by_category = {} + + def IncrementErrorCount(self, category): + """Bumps the module's error statistic.""" + self.error_count += 1 + if self.counting in ('toplevel', 'detailed'): + if self.counting != 'detailed': + category = category.split('/')[0] + if category not in self.errors_by_category: + self.errors_by_category[category] = 0 + self.errors_by_category[category] += 1 + + def PrintErrorCounts(self): + """Print a summary of errors by category, and the total.""" + for category, count in sorted(dict.items(self.errors_by_category)): + self.PrintInfo(f'Category \'{category}\' errors found: {count}\n') + if self.error_count > 0: + self.PrintInfo(f'Total errors found: {self.error_count}\n') + + def PrintInfo(self, message): + # _quiet does not represent --quiet flag. + # Hide infos from stdout to keep stdout pure for machine consumption + if not _quiet and self.output_format not in _MACHINE_OUTPUTS: + sys.stdout.write(message) + + def PrintError(self, message): + if self.output_format == 'junit': + self._junit_errors.append(message) + else: + sys.stderr.write(message) - def FormatJUnitXML(self): - num_errors = len(self._junit_errors) - num_failures = len(self._junit_failures) + def AddJUnitFailure(self, filename, linenum, message, category, confidence): + self._junit_failures.append((filename, linenum, message, category, + confidence)) - testsuite = xml.etree.ElementTree.Element("testsuite") - testsuite.attrib["errors"] = str(num_errors) - testsuite.attrib["failures"] = str(num_failures) - testsuite.attrib["name"] = "cpplint" + def FormatJUnitXML(self): + num_errors = len(self._junit_errors) + num_failures = len(self._junit_failures) - if num_errors == 0 and num_failures == 0: - testsuite.attrib["tests"] = str(1) - xml.etree.ElementTree.SubElement(testsuite, "testcase", name="passed") + testsuite = xml.etree.ElementTree.Element('testsuite') + testsuite.attrib['errors'] = str(num_errors) + testsuite.attrib['failures'] = str(num_failures) + testsuite.attrib['name'] = 'cpplint' - else: - testsuite.attrib["tests"] = str(num_errors + num_failures) - if num_errors > 0: - testcase = xml.etree.ElementTree.SubElement(testsuite, "testcase") - testcase.attrib["name"] = "errors" - error = xml.etree.ElementTree.SubElement(testcase, "error") - error.text = "\n".join(self._junit_errors) - if num_failures > 0: - # Group failures by file - failed_file_order = [] - failures_by_file = {} - for failure in self._junit_failures: - failed_file = failure[0] - if failed_file not in failed_file_order: - failed_file_order.append(failed_file) - failures_by_file[failed_file] = [] - failures_by_file[failed_file].append(failure) - # Create a testcase for each file - for failed_file in failed_file_order: - failures = failures_by_file[failed_file] - testcase = xml.etree.ElementTree.SubElement(testsuite, "testcase") - testcase.attrib["name"] = failed_file - failure = xml.etree.ElementTree.SubElement(testcase, "failure") - template = "{0}: {1} [{2}] [{3}]" - texts = [template.format(f[1], f[2], f[3], f[4]) for f in failures] - failure.text = "\n".join(texts) - - xml_decl = '\n' - return xml_decl + xml.etree.ElementTree.tostring(testsuite, "utf-8").decode( - "utf-8" - ) + if num_errors == 0 and num_failures == 0: + testsuite.attrib['tests'] = str(1) + xml.etree.ElementTree.SubElement(testsuite, 'testcase', name='passed') + + else: + testsuite.attrib['tests'] = str(num_errors + num_failures) + if num_errors > 0: + testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') + testcase.attrib['name'] = 'errors' + error = xml.etree.ElementTree.SubElement(testcase, 'error') + error.text = '\n'.join(self._junit_errors) + if num_failures > 0: + # Group failures by file + failed_file_order = [] + failures_by_file = {} + for failure in self._junit_failures: + failed_file = failure[0] + if failed_file not in failed_file_order: + failed_file_order.append(failed_file) + failures_by_file[failed_file] = [] + failures_by_file[failed_file].append(failure) + # Create a testcase for each file + for failed_file in failed_file_order: + failures = failures_by_file[failed_file] + testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') + testcase.attrib['name'] = failed_file + failure = xml.etree.ElementTree.SubElement(testcase, 'failure') + template = '{0}: {1} [{2}] [{3}]' + texts = [template.format(f[1], f[2], f[3], f[4]) for f in failures] + failure.text = '\n'.join(texts) + + xml_decl = '\n' + return xml_decl + xml.etree.ElementTree.tostring(testsuite, 'utf-8').decode('utf-8') _cpplint_state = _CppLintState() def _OutputFormat(): - """Gets the module's output format.""" - return _cpplint_state.output_format + """Gets the module's output format.""" + return _cpplint_state.output_format def _SetOutputFormat(output_format): - """Sets the module's output format.""" - _cpplint_state.SetOutputFormat(output_format) - + """Sets the module's output format.""" + _cpplint_state.SetOutputFormat(output_format) def _Quiet(): - """Return's the module's quiet setting.""" - return _cpplint_state.quiet - + """Return's the module's quiet setting.""" + return _cpplint_state.quiet def _SetQuiet(quiet): - """Set the module's quiet status, and return previous setting.""" - return _cpplint_state.SetQuiet(quiet) + """Set the module's quiet status, and return previous setting.""" + return _cpplint_state.SetQuiet(quiet) def _VerboseLevel(): - """Returns the module's verbosity setting.""" - return _cpplint_state.verbose_level + """Returns the module's verbosity setting.""" + return _cpplint_state.verbose_level def _SetVerboseLevel(level): - """Sets the module's verbosity, and returns the previous setting.""" - return _cpplint_state.SetVerboseLevel(level) + """Sets the module's verbosity, and returns the previous setting.""" + return _cpplint_state.SetVerboseLevel(level) def _SetCountingStyle(level): - """Sets the module's counting options.""" - _cpplint_state.SetCountingStyle(level) + """Sets the module's counting options.""" + _cpplint_state.SetCountingStyle(level) def _Filters(): - """Returns the module's list of output filters, as a list.""" - return _cpplint_state.filters + """Returns the module's list of output filters, as a list.""" + return _cpplint_state.filters def _SetFilters(filters): - """Sets the module's error-message filters. - - These filters are applied when deciding whether to emit a given - error message. + """Sets the module's error-message filters. - Args: - filters: A string of comma-separated filters (eg "whitespace/indent"). - Each filter should start with + or -; else we die. - """ - _cpplint_state.SetFilters(filters) + These filters are applied when deciding whether to emit a given + error message. + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.SetFilters(filters) def _AddFilters(filters): - """Adds more filter overrides. + """Adds more filter overrides. + + Unlike _SetFilters, this function does not reset the current list of filters + available. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.AddFilters(filters) + +def _BackupFilters(): + """ Saves the current filter list to backup storage.""" + _cpplint_state.BackupFilters() - Unlike _SetFilters, this function does not reset the current list of filters - available. +def _RestoreFilters(): + """ Restores filters previously backed up.""" + _cpplint_state.RestoreFilters() + +class _FunctionState(object): + """Tracks current function name and the number of lines in its body.""" + + _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. + _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. + + def __init__(self): + self.in_a_function = False + self.lines_in_function = 0 + self.current_function = '' + + def Begin(self, function_name): + """Start analyzing function body. Args: - filters: A string of comma-separated filters (eg "whitespace/indent"). - Each filter should start with + or -; else we die. + function_name: The name of the function being tracked. """ - _cpplint_state.AddFilters(filters) + self.in_a_function = True + self.lines_in_function = 0 + self.current_function = function_name + def Count(self): + """Count line in current function body.""" + if self.in_a_function: + self.lines_in_function += 1 -def _BackupFilters(): - """Saves the current filter list to backup storage.""" - _cpplint_state.BackupFilters() + def Check(self, error, filename, linenum): + """Report if too many lines in function body. + Args: + error: The function to call with any errors found. + filename: The name of the current file. + linenum: The number of the line to check. + """ + if not self.in_a_function: + return -def _RestoreFilters(): - """Restores filters previously backed up.""" - _cpplint_state.RestoreFilters() + if re.match(r'T(EST|est)', self.current_function): + base_trigger = self._TEST_TRIGGER + else: + base_trigger = self._NORMAL_TRIGGER + trigger = base_trigger * 2**_VerboseLevel() + if self.lines_in_function > trigger: + error_level = int(math.log(self.lines_in_function / base_trigger, 2)) + # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... + if error_level > 5: + error_level = 5 + error(filename, linenum, 'readability/fn_size', error_level, + 'Small and focused functions are preferred:' + f' {self.current_function} has {self.lines_in_function} non-comment lines' + f' (error triggered by exceeding {trigger} lines).') -class _FunctionState(object): - """Tracks current function name and the number of lines in its body.""" - - _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. - _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. - - def __init__(self): - self.in_a_function = False - self.lines_in_function = 0 - self.current_function = "" - - def Begin(self, function_name): - """Start analyzing function body. - - Args: - function_name: The name of the function being tracked. - """ - self.in_a_function = True - self.lines_in_function = 0 - self.current_function = function_name - - def Count(self): - """Count line in current function body.""" - if self.in_a_function: - self.lines_in_function += 1 - - def Check(self, error, filename, linenum): - """Report if too many lines in function body. - - Args: - error: The function to call with any errors found. - filename: The name of the current file. - linenum: The number of the line to check. - """ - if not self.in_a_function: - return - - if Match(r"T(EST|est)", self.current_function): - base_trigger = self._TEST_TRIGGER - else: - base_trigger = self._NORMAL_TRIGGER - trigger = base_trigger * 2 ** _VerboseLevel() - - if self.lines_in_function > trigger: - error_level = int(math.log(self.lines_in_function / base_trigger, 2)) - # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... - if error_level > 5: - error_level = 5 - error( - filename, - linenum, - "readability/fn_size", - error_level, - "Small and focused functions are preferred:" - " %s has %d non-comment lines" - " (error triggered by exceeding %d lines)." - % (self.current_function, self.lines_in_function, trigger), - ) - - def End(self): - """Stop analyzing function body.""" - self.in_a_function = False + def End(self): + """Stop analyzing function body.""" + self.in_a_function = False class _IncludeError(Exception): - """Indicates a problem with the include order in a file.""" - - pass + """Indicates a problem with the include order in a file.""" + pass class FileInfo(object): - """Provides utility functions for filenames. + """Provides utility functions for filenames. + + FileInfo provides easy access to the components of a file's path + relative to the project root. + """ + + def __init__(self, filename): + self._filename = filename + + def FullName(self): + """Make Windows paths like Unix.""" + return os.path.abspath(self._filename).replace('\\', '/') + + def RepositoryName(self): + r"""FullName after removing the local path to the repository. + + If we have a real absolute path name here we can try to do something smart: + detecting the root of the checkout and truncating /path/to/checkout from + the name so that we get header guards that don't include things like + "C:\\Documents and Settings\\..." or "/home/username/..." in them and thus + people on different computers who have checked the source out to different + locations won't see bogus errors. + """ + fullname = self.FullName() + + if os.path.exists(fullname): + project_dir = os.path.dirname(fullname) + + # If the user specified a repository path, it exists, and the file is + # contained in it, use the specified repository path + if _repository: + repo = FileInfo(_repository).FullName() + root_dir = project_dir + while os.path.exists(root_dir): + # allow case insensitive compare on Windows + if os.path.normcase(root_dir) == os.path.normcase(repo): + return os.path.relpath(fullname, root_dir).replace('\\', '/') + one_up_dir = os.path.dirname(root_dir) + if one_up_dir == root_dir: + break + root_dir = one_up_dir + + if os.path.exists(os.path.join(project_dir, ".svn")): + # If there's a .svn file in the current directory, we recursively look + # up the directory tree for the top of the SVN checkout + root_dir = project_dir + one_up_dir = os.path.dirname(root_dir) + while os.path.exists(os.path.join(one_up_dir, ".svn")): + root_dir = os.path.dirname(root_dir) + one_up_dir = os.path.dirname(one_up_dir) + + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by + # searching up from the current path. + root_dir = current_dir = os.path.dirname(fullname) + while current_dir != os.path.dirname(current_dir): + if (os.path.exists(os.path.join(current_dir, ".git")) or + os.path.exists(os.path.join(current_dir, ".hg")) or + os.path.exists(os.path.join(current_dir, ".svn"))): + root_dir = current_dir + break + current_dir = os.path.dirname(current_dir) + + if (os.path.exists(os.path.join(root_dir, ".git")) or + os.path.exists(os.path.join(root_dir, ".hg")) or + os.path.exists(os.path.join(root_dir, ".svn"))): + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Don't know what to do; header guard warnings may be wrong... + return fullname + + def Split(self): + """Splits the file into the directory, basename, and extension. + + For 'chrome/browser/browser.cc', Split() would + return ('chrome/browser', 'browser', '.cc') - FileInfo provides easy access to the components of a file's path - relative to the project root. + Returns: + A tuple of (directory, basename, extension). """ - def __init__(self, filename): - self._filename = filename - - def FullName(self): - """Make Windows paths like Unix.""" - return os.path.abspath(self._filename).replace("\\", "/") - - def RepositoryName(self): - r"""FullName after removing the local path to the repository. - - If we have a real absolute path name here we can try to do something smart: - detecting the root of the checkout and truncating /path/to/checkout from - the name so that we get header guards that don't include things like - "C:\\Documents and Settings\\..." or "/home/username/..." in them and thus - people on different computers who have checked the source out to different - locations won't see bogus errors. - """ - fullname = self.FullName() - - if os.path.exists(fullname): - project_dir = os.path.dirname(fullname) - - # If the user specified a repository path, it exists, and the file is - # contained in it, use the specified repository path - if _repository: - repo = FileInfo(_repository).FullName() - root_dir = project_dir - while os.path.exists(root_dir): - # allow case insensitive compare on Windows - if os.path.normcase(root_dir) == os.path.normcase(repo): - return os.path.relpath(fullname, root_dir).replace("\\", "/") - one_up_dir = os.path.dirname(root_dir) - if one_up_dir == root_dir: - break - root_dir = one_up_dir - - if os.path.exists(os.path.join(project_dir, ".svn")): - # If there's a .svn file in the current directory, we recursively look - # up the directory tree for the top of the SVN checkout - root_dir = project_dir - one_up_dir = os.path.dirname(root_dir) - while os.path.exists(os.path.join(one_up_dir, ".svn")): - root_dir = os.path.dirname(root_dir) - one_up_dir = os.path.dirname(one_up_dir) - - prefix = os.path.commonprefix([root_dir, project_dir]) - return fullname[len(prefix) + 1 :] - - # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by - # searching up from the current path. - root_dir = current_dir = os.path.dirname(fullname) - while current_dir != os.path.dirname(current_dir): - if ( - os.path.exists(os.path.join(current_dir, ".git")) - or os.path.exists(os.path.join(current_dir, ".hg")) - or os.path.exists(os.path.join(current_dir, ".svn")) - ): - root_dir = current_dir - current_dir = os.path.dirname(current_dir) - - if ( - os.path.exists(os.path.join(root_dir, ".git")) - or os.path.exists(os.path.join(root_dir, ".hg")) - or os.path.exists(os.path.join(root_dir, ".svn")) - ): - prefix = os.path.commonprefix([root_dir, project_dir]) - return fullname[len(prefix) + 1 :] - - # Don't know what to do; header guard warnings may be wrong... - return fullname - - def Split(self): - """Splits the file into the directory, basename, and extension. - - For 'chrome/browser/browser.cc', Split() would - return ('chrome/browser', 'browser', '.cc') - - Returns: - A tuple of (directory, basename, extension). - """ - - googlename = self.RepositoryName() - project, rest = os.path.split(googlename) - return (project,) + os.path.splitext(rest) - - def BaseName(self): - """File base name - text after the final slash, before the final period.""" - return self.Split()[1] - - def Extension(self): - """File extension - text following the final period, includes that period.""" - return self.Split()[2] - - def NoExtension(self): - """File has no source file extension.""" - return "/".join(self.Split()[0:2]) - - def IsSource(self): - """File has a source file extension.""" - return _IsSourceExtension(self.Extension()[1:]) - - -def _ShouldPrintError(category, confidence, linenum): - """If confidence >= verbose, category passes filter and is not suppressed.""" - - # There are three ways we might decide not to print an error message: - # a "NOLINT(category)" comment appears in the source, - # the verbosity level isn't high enough, or the filters filter it out. - if IsErrorSuppressedByNolint(category, linenum): - return False + googlename = self.RepositoryName() + project, rest = os.path.split(googlename) + return (project,) + os.path.splitext(rest) - if confidence < _cpplint_state.verbose_level: - return False + def BaseName(self): + """File base name - text after the final slash, before the final period.""" + return self.Split()[1] - is_filtered = False - for one_filter in _Filters(): - if one_filter.startswith("-"): - if category.startswith(one_filter[1:]): - is_filtered = True - elif one_filter.startswith("+"): - if category.startswith(one_filter[1:]): - is_filtered = False - else: - assert False # should have been checked for in SetFilter. - if is_filtered: - return False + def Extension(self): + """File extension - text following the final period, includes that period.""" + return self.Split()[2] - return True + def NoExtension(self): + """File has no source file extension.""" + return '/'.join(self.Split()[0:2]) + def IsSource(self): + """File has a source file extension.""" + return _IsSourceExtension(self.Extension()[1:]) -def Error(filename, linenum, category, confidence, message): - """Logs the fact we've found a lint error. - We log where the error was found, and also our confidence in the error, - that is, how certain we are this is a legitimate style regression, and - not a misidentification or a use that's sometimes justified. +def _ShouldPrintError(category, confidence, filename, linenum): + """If confidence >= verbose, category passes filter and is not suppressed.""" - False positives can be suppressed by the use of - "cpplint(category)" comments on the offending line. These are - parsed into _error_suppressions. + # There are three ways we might decide not to print an error message: + # a "NOLINT(category)" comment appears in the source, + # the verbosity level isn't high enough, or the filters filter it out. + if IsErrorSuppressedByNolint(category, linenum): + return False + + if confidence < _cpplint_state.verbose_level: + return False + + is_filtered = False + for one_filter in _Filters(): + filter_cat, filter_file, filter_line = _ParseFilterSelector(one_filter[1:]) + category_match = category.startswith(filter_cat) + file_match = filter_file == "" or filter_file == filename + line_match = filter_line == linenum or filter_line == -1 + + if one_filter.startswith('-'): + if category_match and file_match and line_match: + is_filtered = True + elif one_filter.startswith('+'): + if category_match and file_match and line_match: + is_filtered = False + else: + assert False # should have been checked for in SetFilter. + if is_filtered: + return False + + return True - Args: - filename: The name of the file containing the error. - linenum: The number of the line containing the error. - category: A string used to describe the "category" this bug - falls under: "whitespace", say, or "runtime". Categories - may have a hierarchy separated by slashes: "whitespace/indent". - confidence: A number from 1-5 representing a confidence score for - the error, with 5 meaning that we are certain of the problem, - and 1 meaning that it could be a legitimate construct. - message: The error message. - """ - if _ShouldPrintError(category, confidence, linenum): - _cpplint_state.IncrementErrorCount(category) - if _cpplint_state.output_format == "vs7": - _cpplint_state.PrintError( - "%s(%s): error cpplint: [%s] %s [%d]\n" - % (filename, linenum, category, message, confidence) - ) - elif _cpplint_state.output_format == "eclipse": - sys.stderr.write( - "%s:%s: warning: %s [%s] [%d]\n" - % (filename, linenum, message, category, confidence) - ) - elif _cpplint_state.output_format == "junit": - _cpplint_state.AddJUnitFailure( - filename, linenum, message, category, confidence - ) - else: - final_message = "%s:%s: %s [%s] [%d]\n" % ( - filename, - linenum, - message, - category, - confidence, - ) - sys.stderr.write(final_message) + +def Error(filename, linenum, category, confidence, message): + """Logs the fact we've found a lint error. + + We log where the error was found, and also our confidence in the error, + that is, how certain we are this is a legitimate style regression, and + not a misidentification or a use that's sometimes justified. + + False positives can be suppressed by the use of "NOLINT(category)" + comments, NOLINTNEXTLINE or in blocks started by NOLINTBEGIN. These + are parsed into _error_suppressions. + + Args: + filename: The name of the file containing the error. + linenum: The number of the line containing the error. + category: A string used to describe the "category" this bug + falls under: "whitespace", say, or "runtime". Categories + may have a hierarchy separated by slashes: "whitespace/indent". + confidence: A number from 1-5 representing a confidence score for + the error, with 5 meaning that we are certain of the problem, + and 1 meaning that it could be a legitimate construct. + message: The error message. + """ + if _ShouldPrintError(category, confidence, filename, linenum): + _cpplint_state.IncrementErrorCount(category) + if _cpplint_state.output_format == 'vs7': + _cpplint_state.PrintError(f'{filename}({linenum}): error cpplint:' + f' [{category}] {message} [{confidence}]\n') + elif _cpplint_state.output_format == 'eclipse': + sys.stderr.write(f'{filename}:{linenum}: warning:' + f' {message} [{category}] [{confidence}]\n') + elif _cpplint_state.output_format == 'junit': + _cpplint_state.AddJUnitFailure(filename, linenum, message, category, confidence) + elif _cpplint_state.output_format in ['sed', 'gsed']: + if message in _SED_FIXUPS: + sys.stdout.write(f"{_cpplint_state.output_format} -i" + f" '{linenum}{_SED_FIXUPS[message]}' {filename}" + f" # {message} [{category}] [{confidence}]\n") + else: + sys.stderr.write(f'# {filename}:{linenum}: ' + f' "{message}" [{category}] [{confidence}]\n') + else: + final_message = (f'{filename}:{linenum}: ' + f' {message} [{category}] [{confidence}]\n') + sys.stderr.write(final_message) # Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. -_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') +_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( + r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') # Match a single C style comment on the same line. -_RE_PATTERN_C_COMMENTS = r"/\*(?:[^*]|\*(?!/))*\*/" +_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/' # Matches multi-line C style comments. # This RE is a little bit more complicated than one might expect, because we # have to take care of space removals tools so we can handle comments inside @@ -1555,923 +1836,866 @@ def Error(filename, linenum, category, confidence, message): # if this doesn't work we try on left side but only if there's a non-character # on the right. _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( - r"(\s*" - + _RE_PATTERN_C_COMMENTS - + r"\s*$|" - + _RE_PATTERN_C_COMMENTS - + r"\s+|" - + r"\s+" - + _RE_PATTERN_C_COMMENTS - + r"(?=\W)|" - + _RE_PATTERN_C_COMMENTS - + r")" -) + r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + + _RE_PATTERN_C_COMMENTS + r'\s+|' + + r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' + + _RE_PATTERN_C_COMMENTS + r')') def IsCppString(line): - """Does line terminate so, that the next symbol is in string constant. + """Does line terminate so, that the next symbol is in string constant. - This function does not consider single-line nor multi-line comments. + This function does not consider single-line nor multi-line comments. - Args: - line: is a partial line of code starting from the 0..n. + Args: + line: is a partial line of code starting from the 0..n. - Returns: - True, if next character appended to 'line' is inside a - string constant. - """ + Returns: + True, if next character appended to 'line' is inside a + string constant. + """ - line = line.replace(r"\\", "XX") # after this, \\" does not match to \" - return ((line.count('"') - line.count(r"\"") - line.count("'\"'")) & 1) == 1 + line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" + return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 def CleanseRawStrings(raw_lines): - """Removes C++11 raw strings from lines. - - Before: - static const char kData[] = R"( - multi-line string - )"; - - After: - static const char kData[] = "" - (replaced by blank line) - ""; - - Args: - raw_lines: list of raw lines. + """Removes C++11 raw strings from lines. + + Before: + static const char kData[] = R"( + multi-line string + )"; + + After: + static const char kData[] = "" + (replaced by blank line) + ""; + + Args: + raw_lines: list of raw lines. + + Returns: + list of lines with C++11 raw strings replaced by empty strings. + """ + + delimiter = None + lines_without_raw_strings = [] + for line in raw_lines: + if delimiter: + # Inside a raw string, look for the end + end = line.find(delimiter) + if end >= 0: + # Found the end of the string, match leading space for this + # line and resume copying the original lines, and also insert + # a "" on the last line. + leading_space = re.match(r'^(\s*)\S', line) + line = leading_space.group(1) + '""' + line[end + len(delimiter):] + delimiter = None + else: + # Haven't found the end yet, append a blank line. + line = '""' + + # Look for beginning of a raw string, and replace them with + # empty strings. This is done in a loop to handle multiple raw + # strings on the same line. + while delimiter is None: + # Look for beginning of a raw string. + # See 2.14.15 [lex.string] for syntax. + # + # Once we have matched a raw string, we check the prefix of the + # line to make sure that the line is not part of a single line + # comment. It's done this way because we remove raw strings + # before removing comments as opposed to removing comments + # before removing raw strings. This is because there are some + # cpplint checks that requires the comments to be preserved, but + # we don't want to check comments that are inside raw strings. + matched = re.match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) + if (matched and + not re.match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', + matched.group(1))): + delimiter = ')' + matched.group(2) + '"' + + end = matched.group(3).find(delimiter) + if end >= 0: + # Raw string ended on same line + line = (matched.group(1) + '""' + + matched.group(3)[end + len(delimiter):]) + delimiter = None + else: + # Start of a multi-line raw string + line = matched.group(1) + '""' + else: + break - Returns: - list of lines with C++11 raw strings replaced by empty strings. - """ + lines_without_raw_strings.append(line) - delimiter = None - lines_without_raw_strings = [] - for line in raw_lines: - if delimiter: - # Inside a raw string, look for the end - end = line.find(delimiter) - if end >= 0: - # Found the end of the string, match leading space for this - # line and resume copying the original lines, and also insert - # a "" on the last line. - leading_space = Match(r"^(\s*)\S", line) - line = leading_space.group(1) + '""' + line[end + len(delimiter) :] - delimiter = None - else: - # Haven't found the end yet, append a blank line. - line = '""' - - # Look for beginning of a raw string, and replace them with - # empty strings. This is done in a loop to handle multiple raw - # strings on the same line. - while delimiter is None: - # Look for beginning of a raw string. - # See 2.14.15 [lex.string] for syntax. - # - # Once we have matched a raw string, we check the prefix of the - # line to make sure that the line is not part of a single line - # comment. It's done this way because we remove raw strings - # before removing comments as opposed to removing comments - # before removing raw strings. This is because there are some - # cpplint checks that requires the comments to be preserved, but - # we don't want to check comments that are inside raw strings. - matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) - if matched and not Match( - r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', matched.group(1) - ): - delimiter = ")" + matched.group(2) + '"' - - end = matched.group(3).find(delimiter) - if end >= 0: - # Raw string ended on same line - line = ( - matched.group(1) - + '""' - + matched.group(3)[end + len(delimiter) :] - ) - delimiter = None - else: - # Start of a multi-line raw string - line = matched.group(1) + '""' - else: - break - - lines_without_raw_strings.append(line) - - # TODO(unknown): if delimiter is not None here, we might want to - # emit a warning for unterminated string. - return lines_without_raw_strings + # TODO(unknown): if delimiter is not None here, we might want to + # emit a warning for unterminated string. + return lines_without_raw_strings def FindNextMultiLineCommentStart(lines, lineix): - """Find the beginning marker for a multiline comment.""" - while lineix < len(lines): - if lines[lineix].strip().startswith("/*"): - # Only return this marker if the comment goes beyond this line - if lines[lineix].strip().find("*/", 2) < 0: - return lineix - lineix += 1 - return len(lines) + """Find the beginning marker for a multiline comment.""" + while lineix < len(lines): + if lines[lineix].strip().startswith('/*'): + # Only return this marker if the comment goes beyond this line + if lines[lineix].strip().find('*/', 2) < 0: + return lineix + lineix += 1 + return len(lines) def FindNextMultiLineCommentEnd(lines, lineix): - """We are inside a comment, find the end marker.""" - while lineix < len(lines): - if lines[lineix].strip().endswith("*/"): - return lineix - lineix += 1 - return len(lines) + """We are inside a comment, find the end marker.""" + while lineix < len(lines): + if lines[lineix].strip().endswith('*/'): + return lineix + lineix += 1 + return len(lines) def RemoveMultiLineCommentsFromRange(lines, begin, end): - """Clears a range of lines for multi-line comments.""" - # Having // dummy comments makes the lines non-empty, so we will not get - # unnecessary blank line warnings later in the code. - for i in range(begin, end): - lines[i] = "/**/" + """Clears a range of lines for multi-line comments.""" + # Having // comments makes the lines non-empty, so we will not get + # unnecessary blank line warnings later in the code. + for i in range(begin, end): + lines[i] = '/**/' def RemoveMultiLineComments(filename, lines, error): - """Removes multiline (c-style) comments from lines.""" - lineix = 0 - while lineix < len(lines): - lineix_begin = FindNextMultiLineCommentStart(lines, lineix) - if lineix_begin >= len(lines): - return - lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) - if lineix_end >= len(lines): - error( - filename, - lineix_begin + 1, - "readability/multiline_comment", - 5, - "Could not find end of multi-line comment", - ) - return - RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) - lineix = lineix_end + 1 + """Removes multiline (c-style) comments from lines.""" + lineix = 0 + while lineix < len(lines): + lineix_begin = FindNextMultiLineCommentStart(lines, lineix) + if lineix_begin >= len(lines): + return + lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) + if lineix_end >= len(lines): + error(filename, lineix_begin + 1, 'readability/multiline_comment', 5, + 'Could not find end of multi-line comment') + return + RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) + lineix = lineix_end + 1 def CleanseComments(line): - """Removes //-comments and single-line C-style /* */ comments. + """Removes //-comments and single-line C-style /* */ comments. - Args: - line: A line of C++ source. + Args: + line: A line of C++ source. - Returns: - The line with single-line comments removed. - """ - commentpos = line.find("//") - if commentpos != -1 and not IsCppString(line[:commentpos]): - line = line[:commentpos].rstrip() - # get rid of /* ... */ - return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub("", line) + Returns: + The line with single-line comments removed. + """ + commentpos = line.find('//') + if commentpos != -1 and not IsCppString(line[:commentpos]): + line = line[:commentpos].rstrip() + # get rid of /* ... */ + return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) -class CleansedLines(object): - """Holds 4 copies of all lines with different preprocessing applied to them. - - 1) elided member contains lines without strings and comments. - 2) lines member contains lines without comments. - 3) raw_lines member contains all the lines without processing. - 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw - strings removed. - All these members are of , and of the same length. - """ +def ReplaceAlternateTokens(line): + """Replace any alternate token by its original counterpart. - def __init__(self, lines): - self.elided = [] - self.lines = [] - self.raw_lines = lines - self.num_lines = len(lines) - self.lines_without_raw_strings = CleanseRawStrings(lines) - for linenum in range(len(self.lines_without_raw_strings)): - self.lines.append(CleanseComments(self.lines_without_raw_strings[linenum])) - elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) - self.elided.append(CleanseComments(elided)) - - def NumLines(self): - """Returns the number of lines represented.""" - return self.num_lines - - @staticmethod - def _CollapseStrings(elided): - """Collapses strings and chars on a line to simple "" or '' blocks. - - We nix strings first so we're not fooled by text like '"http://"' - - Args: - elided: The line being processed. - - Returns: - The line with collapsed strings. - """ - if _RE_PATTERN_INCLUDE.match(elided): - return elided - - # Remove escaped characters first to make quote/single quote collapsing - # basic. Things that look like escaped characters shouldn't occur - # outside of strings and chars. - elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub("", elided) - - # Replace quoted strings and digit separators. Both single quotes - # and double quotes are processed in the same loop, otherwise - # nested quotes wouldn't work. - collapsed = "" - while True: - # Find the first quote character - match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) - if not match: - collapsed += elided - break - head, quote, tail = match.groups() - - if quote == '"': - # Collapse double quoted strings - second_quote = tail.find('"') - if second_quote >= 0: - collapsed += head + '""' - elided = tail[second_quote + 1 :] - else: - # Unmatched double quote, don't bother processing the rest - # of the line since this is probably a multiline string. - collapsed += elided - break - else: - # Found single quote, check nearby text to eliminate digit separators. - # - # There is no special handling for floating point here, because - # the integer/fractional/exponent parts would all be parsed - # correctly as long as there are digits on both sides of the - # separator. So we are fine as long as we don't see something - # like "0.'3" (gcc 4.9.0 will not allow this literal). - if Search(r"\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$", head): - match_literal = Match(r"^((?:\'?[0-9a-zA-Z_])*)(.*)$", "'" + tail) - collapsed += head + match_literal.group(1).replace("'", "") - elided = match_literal.group(2) - else: - second_quote = tail.find("'") - if second_quote >= 0: - collapsed += head + "''" - elided = tail[second_quote + 1 :] - else: - # Unmatched single quote - collapsed += elided - break - - return collapsed + In order to comply with the google rule stating that unary operators should + never be followed by a space, an exception is made for the 'not' and 'compl' + alternate tokens. For these, any trailing space is removed during the + conversion. + Args: + line: The line being processed. + + Returns: + The line with alternate tokens replaced. + """ + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + token = _ALT_TOKEN_REPLACEMENT[match.group(2)] + tail = '' if match.group(2) in ['not', 'compl'] and match.group(3) == ' ' \ + else r'\3' + line = re.sub(match.re, rf'\1{token}{tail}', line, count=1) + return line -def FindEndOfExpressionInLine(line, startpos, stack): - """Find the position just after the end of current parenthesized expression. + +class CleansedLines(object): + """Holds 4 copies of all lines with different preprocessing applied to them. + + 1) elided member contains lines without strings and comments. + 2) lines member contains lines without comments. + 3) raw_lines member contains all the lines without processing. + 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw + strings removed. + All these members are of , and of the same length. + """ + + def __init__(self, lines): + if '-readability/alt_tokens' in _cpplint_state.filters: + for i, line in enumerate(lines): + lines[i] = ReplaceAlternateTokens(line) + self.elided = [] + self.lines = [] + self.raw_lines = lines + self.num_lines = len(lines) + self.lines_without_raw_strings = CleanseRawStrings(lines) + for line in self.lines_without_raw_strings: + self.lines.append(CleanseComments(line)) + elided = self._CollapseStrings(line) + self.elided.append(CleanseComments(elided)) + + def NumLines(self): + """Returns the number of lines represented.""" + return self.num_lines + + @staticmethod + def _CollapseStrings(elided): + """Collapses strings and chars on a line to simple "" or '' blocks. + + We nix strings first so we're not fooled by text like '"http://"' Args: - line: a CleansedLines line. - startpos: start searching at this position. - stack: nesting stack at startpos. + elided: The line being processed. Returns: - On finding matching end: (index just after matching end, None) - On finding an unclosed expression: (-1, None) - Otherwise: (-1, new stack at end of this line) + The line with collapsed strings. """ - for i in xrange(startpos, len(line)): - char = line[i] - if char in "([{": - # Found start of parenthesized expression, push to expression stack - stack.append(char) - elif char == "<": - # Found potential start of template argument list - if i > 0 and line[i - 1] == "<": - # Left shift operator - if stack and stack[-1] == "<": - stack.pop() - if not stack: - return (-1, None) - elif i > 0 and Search(r"\boperator\s*$", line[0:i]): - # operator<, don't add to stack - continue - else: - # Tentative start of template argument list - stack.append("<") - elif char in ")]}": - # Found end of parenthesized expression. - # - # If we are currently expecting a matching '>', the pending '<' - # must have been an operator. Remove them from expression stack. - while stack and stack[-1] == "<": - stack.pop() - if not stack: - return (-1, None) - if ( - (stack[-1] == "(" and char == ")") - or (stack[-1] == "[" and char == "]") - or (stack[-1] == "{" and char == "}") - ): - stack.pop() - if not stack: - return (i + 1, None) - else: - # Mismatched parentheses - return (-1, None) - elif char == ">": - # Found potential end of template argument list. - - # Ignore "->" and operator functions - if i > 0 and ( - line[i - 1] == "-" or Search(r"\boperator\s*$", line[0 : i - 1]) - ): - continue - - # Pop the stack if there is a matching '<'. Otherwise, ignore - # this '>' since it must be an operator. - if stack: - if stack[-1] == "<": - stack.pop() - if not stack: - return (i + 1, None) - elif char == ";": - # Found something that look like end of statements. If we are currently - # expecting a '>', the matching '<' must have been an operator, since - # template argument list should not contain statements. - while stack and stack[-1] == "<": - stack.pop() - if not stack: - return (-1, None) - - # Did not find end of expression or unbalanced parentheses on this line - return (-1, stack) + if _RE_PATTERN_INCLUDE.match(elided): + return elided + + # Remove escaped characters first to make quote/single quote collapsing + # basic. Things that look like escaped characters shouldn't occur + # outside of strings and chars. + elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) + + # Replace quoted strings and digit separators. Both single quotes + # and double quotes are processed in the same loop, otherwise + # nested quotes wouldn't work. + collapsed = '' + while True: + # Find the first quote character + match = re.match(r'^([^\'"]*)([\'"])(.*)$', elided) + if not match: + collapsed += elided + break + head, quote, tail = match.groups() + + if quote == '"': + # Collapse double quoted strings + second_quote = tail.find('"') + if second_quote >= 0: + collapsed += head + '""' + elided = tail[second_quote + 1:] + else: + # Unmatched double quote, don't bother processing the rest + # of the line since this is probably a multiline string. + collapsed += elided + break + else: + # Found single quote, check nearby text to eliminate digit separators. + # + # There is no special handling for floating point here, because + # the integer/fractional/exponent parts would all be parsed + # correctly as long as there are digits on both sides of the + # separator. So we are fine as long as we don't see something + # like "0.'3" (gcc 4.9.0 will not allow this literal). + if re.search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): + match_literal = re.match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) + collapsed += head + match_literal.group(1).replace("'", '') + elided = match_literal.group(2) + else: + second_quote = tail.find('\'') + if second_quote >= 0: + collapsed += head + "''" + elided = tail[second_quote + 1:] + else: + # Unmatched single quote + collapsed += elided + break + return collapsed -def CloseExpression(clean_lines, linenum, pos): - """If input points to ( or { or [ or <, finds the position that closes it. - If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the - linenum/pos that correspond to the closing of the expression. +def FindEndOfExpressionInLine(line, startpos, stack): + """Find the position just after the end of current parenthesized expression. + + Args: + line: a CleansedLines line. + startpos: start searching at this position. + stack: nesting stack at startpos. + + Returns: + On finding matching end: (index just after matching end, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at end of this line) + """ + for i in range(startpos, len(line)): + char = line[i] + if char in '([{': + # Found start of parenthesized expression, push to expression stack + stack.append(char) + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + if stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + elif i > 0 and re.search(r'\boperator\s*$', line[0:i]): + # operator<, don't add to stack + continue + else: + # Tentative start of template argument list + stack.append('<') + elif char in ')]}': + # Found end of parenthesized expression. + # + # If we are currently expecting a matching '>', the pending '<' + # must have been an operator. Remove them from expression stack. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + if ((stack[-1] == '(' and char == ')') or + (stack[-1] == '[' and char == ']') or + (stack[-1] == '{' and char == '}')): + stack.pop() + if not stack: + return (i + 1, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == '>': + # Found potential end of template argument list. + + # Ignore "->" and operator functions + if (i > 0 and + (line[i - 1] == '-' or re.search(r'\boperator\s*$', line[0:i - 1]))): + continue + + # Pop the stack if there is a matching '<'. Otherwise, ignore + # this '>' since it must be an operator. + if stack: + if stack[-1] == '<': + stack.pop() + if not stack: + return (i + 1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '>', the matching '<' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + + # Did not find end of expression or unbalanced parentheses on this line + return (-1, stack) - TODO(unknown): cpplint spends a fair bit of time matching parentheses. - Ideally we would want to index all opening and closing parentheses once - and have CloseExpression be just a simple lookup, but due to preprocessor - tricks, this is not so easy. - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: A position on the line. +def CloseExpression(clean_lines, linenum, pos): + """If input points to ( or { or [ or <, finds the position that closes it. + + If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the + linenum/pos that correspond to the closing of the expression. + + TODO(unknown): cpplint spends a fair bit of time matching parentheses. + Ideally we would want to index all opening and closing parentheses once + and have CloseExpression be just a simple lookup, but due to preprocessor + tricks, this is not so easy. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *past* the closing brace, or + (line, len(lines), -1) if we never find a close. Note we ignore + strings and comments when matching; and the line we return is the + 'cleansed' line at linenum. + """ + + line = clean_lines.elided[linenum] + if (line[pos] not in '({[<') or re.match(r'<[<=]', line[pos:]): + return (line, clean_lines.NumLines(), -1) - Returns: - A tuple (line, linenum, pos) pointer *past* the closing brace, or - (line, len(lines), -1) if we never find a close. Note we ignore - strings and comments when matching; and the line we return is the - 'cleansed' line at linenum. - """ + # Check first line + (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) + if end_pos > -1: + return (line, linenum, end_pos) + # Continue scanning forward + while stack and linenum < clean_lines.NumLines() - 1: + linenum += 1 line = clean_lines.elided[linenum] - if (line[pos] not in "({[<") or Match(r"<[<=]", line[pos:]): - return (line, clean_lines.NumLines(), -1) - - # Check first line - (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) + (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) if end_pos > -1: - return (line, linenum, end_pos) - - # Continue scanning forward - while stack and linenum < clean_lines.NumLines() - 1: - linenum += 1 - line = clean_lines.elided[linenum] - (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) - if end_pos > -1: - return (line, linenum, end_pos) + return (line, linenum, end_pos) - # Did not find end of expression before end of file, give up - return (line, clean_lines.NumLines(), -1) + # Did not find end of expression before end of file, give up + return (line, clean_lines.NumLines(), -1) def FindStartOfExpressionInLine(line, endpos, stack): - """Find position at the matching start of current expression. - - This is almost the reverse of FindEndOfExpressionInLine, but note - that the input position and returned position differs by 1. - - Args: - line: a CleansedLines line. - endpos: start searching at this position. - stack: nesting stack at endpos. - - Returns: - On finding matching start: (index at matching start, None) - On finding an unclosed expression: (-1, None) - Otherwise: (-1, new stack at beginning of this line) - """ - i = endpos - while i >= 0: - char = line[i] - if char in ")]}": - # Found end of expression, push to expression stack - stack.append(char) - elif char == ">": - # Found potential end of template argument list. - # - # Ignore it if it's a "->" or ">=" or "operator>" - if i > 0 and ( - line[i - 1] == "-" - or Match(r"\s>=\s", line[i - 1 :]) - or Search(r"\boperator\s*$", line[0:i]) - ): - i -= 1 - else: - stack.append(">") - elif char == "<": - # Found potential start of template argument list - if i > 0 and line[i - 1] == "<": - # Left shift operator - i -= 1 - else: - # If there is a matching '>', we can pop the expression stack. - # Otherwise, ignore this '<' since it must be an operator. - if stack and stack[-1] == ">": - stack.pop() - if not stack: - return (i, None) - elif char in "([{": - # Found start of expression. - # - # If there are any unmatched '>' on the stack, they must be - # operators. Remove those. - while stack and stack[-1] == ">": - stack.pop() - if not stack: - return (-1, None) - if ( - (char == "(" and stack[-1] == ")") - or (char == "[" and stack[-1] == "]") - or (char == "{" and stack[-1] == "}") - ): - stack.pop() - if not stack: - return (i, None) - else: - # Mismatched parentheses - return (-1, None) - elif char == ";": - # Found something that look like end of statements. If we are currently - # expecting a '<', the matching '>' must have been an operator, since - # template argument list should not contain statements. - while stack and stack[-1] == ">": - stack.pop() - if not stack: - return (-1, None) - + """Find position at the matching start of current expression. + + This is almost the reverse of FindEndOfExpressionInLine, but note + that the input position and returned position differs by 1. + + Args: + line: a CleansedLines line. + endpos: start searching at this position. + stack: nesting stack at endpos. + + Returns: + On finding matching start: (index at matching start, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at beginning of this line) + """ + i = endpos + while i >= 0: + char = line[i] + if char in ')]}': + # Found end of expression, push to expression stack + stack.append(char) + elif char == '>': + # Found potential end of template argument list. + # + # Ignore it if it's a "->" or ">=" or "operator>" + if (i > 0 and + (line[i - 1] == '-' or + re.match(r'\s>=\s', line[i - 1:]) or + re.search(r'\boperator\s*$', line[0:i]))): i -= 1 - - return (-1, stack) + else: + stack.append('>') + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + i -= 1 + else: + # If there is a matching '>', we can pop the expression stack. + # Otherwise, ignore this '<' since it must be an operator. + if stack and stack[-1] == '>': + stack.pop() + if not stack: + return (i, None) + elif char in '([{': + # Found start of expression. + # + # If there are any unmatched '>' on the stack, they must be + # operators. Remove those. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + if ((char == '(' and stack[-1] == ')') or + (char == '[' and stack[-1] == ']') or + (char == '{' and stack[-1] == '}')): + stack.pop() + if not stack: + return (i, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '<', the matching '>' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + + i -= 1 + + return (-1, stack) def ReverseCloseExpression(clean_lines, linenum, pos): - """If input points to ) or } or ] or >, finds the position that opens it. - - If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the - linenum/pos that correspond to the opening of the expression. + """If input points to ) or } or ] or >, finds the position that opens it. + + If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the + linenum/pos that correspond to the opening of the expression. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *at* the opening brace, or + (line, 0, -1) if we never find the matching opening brace. Note + we ignore strings and comments when matching; and the line we + return is the 'cleansed' line at linenum. + """ + line = clean_lines.elided[linenum] + if line[pos] not in ')}]>': + return (line, 0, -1) - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: A position on the line. + # Check last line + (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) + if start_pos > -1: + return (line, linenum, start_pos) - Returns: - A tuple (line, linenum, pos) pointer *at* the opening brace, or - (line, 0, -1) if we never find the matching opening brace. Note - we ignore strings and comments when matching; and the line we - return is the 'cleansed' line at linenum. - """ + # Continue scanning backward + while stack and linenum > 0: + linenum -= 1 line = clean_lines.elided[linenum] - if line[pos] not in ")}]>": - return (line, 0, -1) - - # Check last line - (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) + (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack) if start_pos > -1: - return (line, linenum, start_pos) - - # Continue scanning backward - while stack and linenum > 0: - linenum -= 1 - line = clean_lines.elided[linenum] - (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack) - if start_pos > -1: - return (line, linenum, start_pos) + return (line, linenum, start_pos) - # Did not find start of expression before beginning of file, give up - return (line, 0, -1) + # Did not find start of expression before beginning of file, give up + return (line, 0, -1) def CheckForCopyright(filename, lines, error): - """Logs an error if no Copyright message appears at the top of the file.""" + """Logs an error if no Copyright message appears at the top of the file.""" - # We'll say it should occur by line 10. Don't forget there's a - # dummy line at the front. - for line in xrange(1, min(len(lines), 11)): - if re.search(r"Copyright", lines[line], re.I): - break - else: # means no copyright line was found - error( - filename, - 0, - "legal/copyright", - 5, - "No copyright message found. " - 'You should have a line: "Copyright [year] "', - ) + # We'll say it should occur by line 10. Don't forget there's a + # placeholder line at the front. + for line in range(1, min(len(lines), 11)): + if re.search(r'Copyright', lines[line], re.I): break + else: # means no copyright line was found + error(filename, 0, 'legal/copyright', 5, + 'No copyright message found. ' + 'You should have a line: "Copyright [year] "') def GetIndentLevel(line): - """Return the number of leading spaces in line. + """Return the number of leading spaces in line. - Args: - line: A string to check. - - Returns: - An integer count of leading spaces, possibly zero. - """ - indent = Match(r"^( *)\S", line) - if indent: - return len(indent.group(1)) - else: - return 0 + Args: + line: A string to check. + Returns: + An integer count of leading spaces, possibly zero. + """ + indent = re.match(r'^( *)\S', line) + if indent: + return len(indent.group(1)) + else: + return 0 def PathSplitToList(path): - """Returns the path split into a list by the separator. + """Returns the path split into a list by the separator. + + Args: + path: An absolute or relative path (e.g. '/a/b/c/' or '../a') + + Returns: + A list of path components (e.g. ['a', 'b', 'c]). + """ + lst = [] + while True: + (head, tail) = os.path.split(path) + if head == path: # absolute paths end + lst.append(head) + break + if tail == path: # relative paths end + lst.append(tail) + break + + path = head + lst.append(tail) + + lst.reverse() + return lst - Args: - path: An absolute or relative path (e.g. '/a/b/c/' or '../a') +def GetHeaderGuardCPPVariable(filename): + """Returns the CPP variable that should be used as a header guard. - Returns: - A list of path components (e.g. ['a', 'b', 'c]). - """ - lst = [] - while True: - (head, tail) = os.path.split(path) - if head == path: # absolute paths end - lst.append(head) - break - if tail == path: # relative paths end - lst.append(tail) - break + Args: + filename: The name of a C++ header file. - path = head - lst.append(tail) + Returns: + The CPP variable that should be used as a header guard in the + named file. - lst.reverse() - return lst + """ + # Restores original filename in case that cpplint is invoked from Emacs's + # flymake. + filename = re.sub(r'_flymake\.h$', '.h', filename) + filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) + # Replace 'c++' with 'cpp'. + filename = filename.replace('C++', 'cpp').replace('c++', 'cpp') -def GetHeaderGuardCPPVariable(filename): - """Returns the CPP variable that should be used as a header guard. + fileinfo = FileInfo(filename) + file_path_from_root = fileinfo.RepositoryName() - Args: - filename: The name of a C++ header file. + def FixupPathFromRoot(): + if _root_debug: + sys.stderr.write(f"\n_root fixup, _root = '{_root}'," + f" repository name = '{fileinfo.RepositoryName()}'\n") - Returns: - The CPP variable that should be used as a header guard in the - named file. + # Process the file path with the --root flag if it was set. + if not _root: + if _root_debug: + sys.stderr.write("_root unspecified\n") + return file_path_from_root - """ + def StripListPrefix(lst, prefix): + # f(['x', 'y'], ['w, z']) -> None (not a valid prefix) + if lst[:len(prefix)] != prefix: + return None + # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd'] + return lst[(len(prefix)):] - # Restores original filename in case that cpplint is invoked from Emacs's - # flymake. - filename = re.sub(r"_flymake\.h$", ".h", filename) - filename = re.sub(r"/\.flymake/([^/]*)$", r"/\1", filename) - # Replace 'c++' with 'cpp'. - filename = filename.replace("C++", "cpp").replace("c++", "cpp") - - fileinfo = FileInfo(filename) - file_path_from_root = fileinfo.RepositoryName() - - def FixupPathFromRoot(): - if _root_debug: - sys.stderr.write( - "\n_root fixup, _root = '%s', repository name = '%s'\n" - % (_root, fileinfo.RepositoryName()) - ) - - # Process the file path with the --root flag if it was set. - if not _root: - if _root_debug: - sys.stderr.write("_root unspecified\n") - return file_path_from_root - - def StripListPrefix(lst, prefix): - # f(['x', 'y'], ['w, z']) -> None (not a valid prefix) - if lst[: len(prefix)] != prefix: - return None - # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd'] - return lst[(len(prefix)) :] - - # root behavior: - # --root=subdir , lstrips subdir from the header guard - maybe_path = StripListPrefix( - PathSplitToList(file_path_from_root), PathSplitToList(_root) - ) - - if _root_debug: - sys.stderr.write( - ( - "_root lstrip (maybe_path=%s, file_path_from_root=%s," - + " _root=%s)\n" - ) - % (maybe_path, file_path_from_root, _root) - ) - - if maybe_path: - return os.path.join(*maybe_path) - - # --root=.. , will prepend the outer directory to the header guard - full_path = fileinfo.FullName() - root_abspath = os.path.abspath(_root) - - maybe_path = StripListPrefix( - PathSplitToList(full_path), PathSplitToList(root_abspath) - ) - - if _root_debug: - sys.stderr.write( - ("_root prepend (maybe_path=%s, full_path=%s, " + "root_abspath=%s)\n") - % (maybe_path, full_path, root_abspath) - ) - - if maybe_path: - return os.path.join(*maybe_path) - - if _root_debug: - sys.stderr.write("_root ignore, returning %s\n" % (file_path_from_root)) - - # --root=FAKE_DIR is ignored - return file_path_from_root - - file_path_from_root = FixupPathFromRoot() - return re.sub(r"[^a-zA-Z0-9]", "_", file_path_from_root).upper() + "_" + # root behavior: + # --root=subdir , lstrips subdir from the header guard + maybe_path = StripListPrefix(PathSplitToList(file_path_from_root), + PathSplitToList(_root)) + if _root_debug: + sys.stderr.write(("_root lstrip (maybe_path=%s, file_path_from_root=%s," + + " _root=%s)\n") % (maybe_path, file_path_from_root, _root)) -def CheckForHeaderGuard(filename, clean_lines, error): - """Checks that the file contains a header guard. + if maybe_path: + return os.path.join(*maybe_path) - Logs an error if no #ifndef header guard is present. For other - headers, checks that the full pathname is used. + # --root=.. , will prepend the outer directory to the header guard + full_path = fileinfo.FullName() + # adapt slashes for windows + root_abspath = os.path.abspath(_root).replace('\\', '/') - Args: - filename: The name of the C++ header file. - clean_lines: A CleansedLines instance containing the file. - error: The function to call with any errors found. - """ + maybe_path = StripListPrefix(PathSplitToList(full_path), + PathSplitToList(root_abspath)) - # Don't check for header guards if there are error suppression - # comments somewhere in this file. - # - # Because this is silencing a warning for a nonexistent line, we - # only support the very specific NOLINT(build/header_guard) syntax, - # and not the general NOLINT or NOLINT(*) syntax. - raw_lines = clean_lines.lines_without_raw_strings - for i in raw_lines: - if Search(r"//\s*NOLINT\(build/header_guard\)", i): - return - - # Allow pragma once instead of header guards - for i in raw_lines: - if Search(r"^\s*#pragma\s+once", i): - return + if _root_debug: + sys.stderr.write(("_root prepend (maybe_path=%s, full_path=%s, " + + "root_abspath=%s)\n") % (maybe_path, full_path, root_abspath)) - cppvar = GetHeaderGuardCPPVariable(filename) + if maybe_path: + return os.path.join(*maybe_path) - ifndef = "" - ifndef_linenum = 0 - define = "" - endif = "" - endif_linenum = 0 - for linenum, line in enumerate(raw_lines): - linesplit = line.split() - if len(linesplit) >= 2: - # find the first occurrence of #ifndef and #define, save arg - if not ifndef and linesplit[0] == "#ifndef": - # set ifndef to the header guard presented on the #ifndef line. - ifndef = linesplit[1] - ifndef_linenum = linenum - if not define and linesplit[0] == "#define": - define = linesplit[1] - # find the last occurrence of #endif, save entire line - if line.startswith("#endif"): - endif = line - endif_linenum = linenum - - if not ifndef or not define or ifndef != define: - error( - filename, - 0, - "build/header_guard", - 5, - "No #ifndef header guard found, suggested CPP variable is: %s" % cppvar, - ) - return + if _root_debug: + sys.stderr.write(f"_root ignore, returning {file_path_from_root}\n") - # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ - # for backward compatibility. - if ifndef != cppvar: - error_level = 0 - if ifndef != cppvar + "_": - error_level = 5 - - ParseNolintSuppressions( - filename, raw_lines[ifndef_linenum], ifndef_linenum, error - ) - error( - filename, - ifndef_linenum, - "build/header_guard", - error_level, - "#ifndef header guard has wrong style, please use: %s" % cppvar, - ) - - # Check for "//" comments on endif line. - ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, error) - match = Match(r"#endif\s*//\s*" + cppvar + r"(_)?\b", endif) - if match: - if match.group(1) == "_": - # Issue low severity warning for deprecated double trailing underscore - error( - filename, - endif_linenum, - "build/header_guard", - 0, - '#endif line should be "#endif // %s"' % cppvar, - ) - return + # --root=FAKE_DIR is ignored + return file_path_from_root - # Didn't find the corresponding "//" comment. If this file does not - # contain any "//" comments at all, it could be that the compiler - # only wants "/**/" comments, look for those instead. - no_single_line_comments = True - for i in xrange(1, len(raw_lines) - 1): - line = raw_lines[i] - if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): - no_single_line_comments = False - break + file_path_from_root = FixupPathFromRoot() + return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_' - if no_single_line_comments: - match = Match(r"#endif\s*/\*\s*" + cppvar + r"(_)?\s*\*/", endif) - if match: - if match.group(1) == "_": - # Low severity warning for double trailing underscore - error( - filename, - endif_linenum, - "build/header_guard", - 0, - '#endif line should be "#endif /* %s */"' % cppvar, - ) - return - - # Didn't find anything - error( - filename, - endif_linenum, - "build/header_guard", - 5, - '#endif line should be "#endif // %s"' % cppvar, - ) +def CheckForHeaderGuard(filename, clean_lines, error): + """Checks that the file contains a header guard. + + Logs an error if no #ifndef header guard is present. For other + headers, checks that the full pathname is used. + + Args: + filename: The name of the C++ header file. + clean_lines: A CleansedLines instance containing the file. + error: The function to call with any errors found. + """ + + # Don't check for header guards if there are error suppression + # comments somewhere in this file. + # + # Because this is silencing a warning for a nonexistent line, we + # only support the very specific NOLINT(build/header_guard) syntax, + # and not the general NOLINT or NOLINT(*) syntax. + raw_lines = clean_lines.lines_without_raw_strings + for i in raw_lines: + if re.search(r'//\s*NOLINT\(build/header_guard\)', i): + return + + # Allow pragma once instead of header guards + for i in raw_lines: + if re.search(r'^\s*#pragma\s+once', i): + return + + cppvar = GetHeaderGuardCPPVariable(filename) + + ifndef = '' + ifndef_linenum = 0 + define = '' + endif = '' + endif_linenum = 0 + for linenum, line in enumerate(raw_lines): + linesplit = line.split() + if len(linesplit) >= 2: + # find the first occurrence of #ifndef and #define, save arg + if not ifndef and linesplit[0] == '#ifndef': + # set ifndef to the header guard presented on the #ifndef line. + ifndef = linesplit[1] + ifndef_linenum = linenum + if not define and linesplit[0] == '#define': + define = linesplit[1] + # find the last occurrence of #endif, save entire line + if line.startswith('#endif'): + endif = line + endif_linenum = linenum + + if not ifndef or not define or ifndef != define: + error(filename, 0, 'build/header_guard', 5, + f'No #ifndef header guard found, suggested CPP variable is: {cppvar}') + return + + # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ + # for backward compatibility. + if ifndef != cppvar: + error_level = 0 + if ifndef != cppvar + '_': + error_level = 5 + + ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum, + error) + error(filename, ifndef_linenum, 'build/header_guard', error_level, + f'#ifndef header guard has wrong style, please use: {cppvar}') + + # Check for "//" comments on endif line. + ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, + error) + match = re.match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif) + if match: + if match.group(1) == '_': + # Issue low severity warning for deprecated double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + f'#endif line should be "#endif // {cppvar}"') + return + + # Didn't find the corresponding "//" comment. If this file does not + # contain any "//" comments at all, it could be that the compiler + # only wants "/**/" comments, look for those instead. + no_single_line_comments = True + for i in range(1, len(raw_lines) - 1): + line = raw_lines[i] + if re.match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): + no_single_line_comments = False + break + + if no_single_line_comments: + match = re.match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif) + if match: + if match.group(1) == '_': + # Low severity warning for double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + f'#endif line should be "#endif /* {cppvar} */"') + return -def CheckHeaderFileIncluded(filename, include_state, error): - """Logs an error if a source file does not include its header.""" + # Didn't find anything + error(filename, endif_linenum, 'build/header_guard', 5, + f'#endif line should be "#endif // {cppvar}"') - # Do not check test files - fileinfo = FileInfo(filename) - if Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): - return - for ext in GetHeaderExtensions(): - basefilename = filename[0 : len(filename) - len(fileinfo.Extension())] - headerfile = basefilename + "." + ext - if not os.path.exists(headerfile): - continue - headername = FileInfo(headerfile).RepositoryName() - first_include = None - for section_list in include_state.include_list: - for f in section_list: - if headername in f[0] or f[0] in headername: - return - if not first_include: - first_include = f[1] - - error( - filename, - first_include, - "build/include", - 5, - "%s should include its header file %s" - % (fileinfo.RepositoryName(), headername), - ) +def CheckHeaderFileIncluded(filename, include_state, error): + """Logs an error if a source file does not include its header.""" + + # Do not check test files + fileinfo = FileInfo(filename) + if re.search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): + return + + first_include = message = None + basefilename = filename[0:len(filename) - len(fileinfo.Extension())] + for ext in GetHeaderExtensions(): + headerfile = basefilename + '.' + ext + if not os.path.exists(headerfile): + continue + headername = FileInfo(headerfile).RepositoryName() + include_uses_unix_dir_aliases = False + for section_list in include_state.include_list: + for f in section_list: + include_text = f[0] + if "./" in include_text: + include_uses_unix_dir_aliases = True + if headername in include_text or include_text in headername: + return + if not first_include: + first_include = f[1] + + message = f'{fileinfo.RepositoryName()} should include its header file {headername}' + if include_uses_unix_dir_aliases: + message += ". Relative paths like . and .. are not allowed." + + if message: + error(filename, first_include, 'build/include', 5, message) def CheckForBadCharacters(filename, lines, error): - """Logs an error for each line containing bad characters. + """Logs an error for each line containing bad characters. - Two kinds of bad characters: + Two kinds of bad characters: - 1. Unicode replacement characters: These indicate that either the file - contained invalid UTF-8 (likely) or Unicode replacement characters (which - it shouldn't). Note that it's possible for this to throw off line - numbering if the invalid UTF-8 occurred adjacent to a newline. + 1. Unicode replacement characters: These indicate that either the file + contained invalid UTF-8 (likely) or Unicode replacement characters (which + it shouldn't). Note that it's possible for this to throw off line + numbering if the invalid UTF-8 occurred adjacent to a newline. - 2. NUL bytes. These are problematic for some tools. + 2. NUL bytes. These are problematic for some tools. - Args: - filename: The name of the current file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ - for linenum, line in enumerate(lines): - if unicode_escape_decode("\ufffd") in line: - error( - filename, - linenum, - "readability/utf8", - 5, - "Line contains invalid UTF-8 (or Unicode replacement character).", - ) - if "\0" in line: - error(filename, linenum, "readability/nul", 5, "Line contains NUL byte.") + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + for linenum, line in enumerate(lines): + if '\ufffd' in line: + error(filename, linenum, 'readability/utf8', 5, + 'Line contains invalid UTF-8 (or Unicode replacement character).') + if '\0' in line: + error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') def CheckForNewlineAtEOF(filename, lines, error): - """Logs an error if there is no newline char at the end of the file. + """Logs an error if there is no newline char at the end of the file. - Args: - filename: The name of the current file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ - # The array lines() was created by adding two newlines to the - # original file (go figure), then splitting on \n. - # To verify that the file ends in \n, we just have to make sure the - # last-but-two element of lines() exists and is empty. - if len(lines) < 3 or lines[-2]: - error( - filename, - len(lines) - 2, - "whitespace/ending_newline", - 5, - "Could not find a newline character at the end of the file.", - ) + # The array lines() was created by adding two newlines to the + # original file (go figure), then splitting on \n. + # To verify that the file ends in \n, we just have to make sure the + # last-but-two element of lines() exists and is empty. + if len(lines) < 3 or lines[-2]: + error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, + 'Could not find a newline character at the end of the file.') def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): - """Logs an error if we see /* ... */ or "..." that extend past one line. - - /* ... */ comments are legit inside macros, for one line. - Otherwise, we prefer // comments, so it's ok to warn about the - other. Likewise, it's ok for strings to extend across multiple - lines, as long as a line continuation character (backslash) - terminates each line. Although not currently prohibited by the C++ - style guide, it's ugly and unnecessary. We don't do well with either - in this lint program, so we warn about both. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Remove all \\ (escaped backslashes) from the line. They are OK, and the - # second (escaped) slash may trigger later \" detection erroneously. - line = line.replace("\\\\", "") - - if line.count("/*") > line.count("*/"): - error( - filename, - linenum, - "readability/multiline_comment", - 5, - "Complex multi-line /*...*/-style comment found. " - "Lint may give bogus warnings. " - "Consider replacing these with //-style comments, " - "with #if 0...#endif, " - "or with more clearly structured multi-line comments.", - ) - - if (line.count('"') - line.count('\\"')) % 2: - error( - filename, - linenum, - "readability/multiline_string", - 5, - 'Multi-line string ("...") found. This lint script doesn\'t ' - "do well with such strings, and may give bogus warnings. " - "Use C++11 raw strings or concatenation instead.", - ) + """Logs an error if we see /* ... */ or "..." that extend past one line. + + /* ... */ comments are legit inside macros, for one line. + Otherwise, we prefer // comments, so it's ok to warn about the + other. Likewise, it's ok for strings to extend across multiple + lines, as long as a line continuation character (backslash) + terminates each line. Although not currently prohibited by the C++ + style guide, it's ugly and unnecessary. We don't do well with either + in this lint program, so we warn about both. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remove all \\ (escaped backslashes) from the line. They are OK, and the + # second (escaped) slash may trigger later \" detection erroneously. + line = line.replace('\\\\', '') + + if line.count('/*') > line.count('*/'): + error(filename, linenum, 'readability/multiline_comment', 5, + 'Complex multi-line /*...*/-style comment found. ' + 'Lint may give bogus warnings. ' + 'Consider replacing these with //-style comments, ' + 'with #if 0...#endif, ' + 'or with more clearly structured multi-line comments.') + + if (line.count('"') - line.count('\\"')) % 2: + error(filename, linenum, 'readability/multiline_string', 5, + 'Multi-line string ("...") found. This lint script doesn\'t ' + 'do well with such strings, and may give bogus warnings. ' + 'Use C++11 raw strings or concatenation instead.') # (non-threadsafe name, thread-safe alternative, validation pattern) @@ -2486,5107 +2710,4268 @@ def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): # in some expression context on the same line by matching on some # operator before the function name. This eliminates constructors and # member function calls. -_UNSAFE_FUNC_PREFIX = r"(?:[-+*/=%^&|(<]\s*|>\s+)" +_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)' _THREADING_LIST = ( - ("asctime(", "asctime_r(", _UNSAFE_FUNC_PREFIX + r"asctime\([^)]+\)"), - ("ctime(", "ctime_r(", _UNSAFE_FUNC_PREFIX + r"ctime\([^)]+\)"), - ("getgrgid(", "getgrgid_r(", _UNSAFE_FUNC_PREFIX + r"getgrgid\([^)]+\)"), - ("getgrnam(", "getgrnam_r(", _UNSAFE_FUNC_PREFIX + r"getgrnam\([^)]+\)"), - ("getlogin(", "getlogin_r(", _UNSAFE_FUNC_PREFIX + r"getlogin\(\)"), - ("getpwnam(", "getpwnam_r(", _UNSAFE_FUNC_PREFIX + r"getpwnam\([^)]+\)"), - ("getpwuid(", "getpwuid_r(", _UNSAFE_FUNC_PREFIX + r"getpwuid\([^)]+\)"), - ("gmtime(", "gmtime_r(", _UNSAFE_FUNC_PREFIX + r"gmtime\([^)]+\)"), - ("localtime(", "localtime_r(", _UNSAFE_FUNC_PREFIX + r"localtime\([^)]+\)"), - ("rand(", "rand_r(", _UNSAFE_FUNC_PREFIX + r"rand\(\)"), - ("strtok(", "strtok_r(", _UNSAFE_FUNC_PREFIX + r"strtok\([^)]+\)"), - ("ttyname(", "ttyname_r(", _UNSAFE_FUNC_PREFIX + r"ttyname\([^)]+\)"), -) + ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'), + ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'), + ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'), + ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'), + ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'), + ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'), + ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'), + ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'), + ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'), + ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'), + ('strtok(', 'strtok_r(', + _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'), + ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), + ) def CheckPosixThreading(filename, clean_lines, linenum, error): - """Checks for calls to thread-unsafe functions. - - Much code has been originally written without consideration of - multi-threading. Also, engineers are relying on their old experience; - they have learned posix before threading extensions were added. These - tests guide the engineers to use thread-safe functions (when using - posix directly). - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: - # Additional pattern matching check to confirm that this is the - # function we are looking for - if Search(pattern, line): - error( - filename, - linenum, - "runtime/threadsafe_fn", - 2, - "Consider using " - + multithread_safe_func - + "...) instead of " - + single_thread_func - + "...) for improved thread safety.", - ) + """Checks for calls to thread-unsafe functions. + + Much code has been originally written without consideration of + multi-threading. Also, engineers are relying on their old experience; + they have learned posix before threading extensions were added. These + tests guide the engineers to use thread-safe functions (when using + posix directly). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: + # Additional pattern matching check to confirm that this is the + # function we are looking for + if re.search(pattern, line): + error(filename, linenum, 'runtime/threadsafe_fn', 2, + 'Consider using ' + multithread_safe_func + + '...) instead of ' + single_thread_func + + '...) for improved thread safety.') def CheckVlogArguments(filename, clean_lines, linenum, error): - """Checks that VLOG() is only used for defining a logging level. - - For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and - VLOG(FATAL) are not. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - if Search(r"\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)", line): - error( - filename, - linenum, - "runtime/vlog", - 5, - "VLOG() should be used with numeric verbosity level. " - "Use LOG() if you want symbolic severity levels.", - ) - + """Checks that VLOG() is only used for defining a logging level. + + For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and + VLOG(FATAL) are not. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if re.search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): + error(filename, linenum, 'runtime/vlog', 5, + 'VLOG() should be used with numeric verbosity level. ' + 'Use LOG() if you want symbolic severity levels.') # Matches invalid increment: *count++, which moves pointer instead of # incrementing a value. -_RE_PATTERN_INVALID_INCREMENT = re.compile(r"^\s*\*\w+(\+\+|--);") +_RE_PATTERN_INVALID_INCREMENT = re.compile( + r'^\s*\*\w+(\+\+|--);') def CheckInvalidIncrement(filename, clean_lines, linenum, error): - """Checks for invalid increment *count++. - - For example following function: - void increment_counter(int* count) { - *count++; - } - is invalid, because it effectively does count++, moving pointer, and should - be replaced with ++*count, (*count)++ or *count += 1. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - if _RE_PATTERN_INVALID_INCREMENT.match(line): - error( - filename, - linenum, - "runtime/invalid_increment", - 5, - "Changing pointer instead of value (or unused value of operator*).", - ) + """Checks for invalid increment *count++. + + For example following function: + void increment_counter(int* count) { + *count++; + } + is invalid, because it effectively does count++, moving pointer, and should + be replaced with ++*count, (*count)++ or *count += 1. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if _RE_PATTERN_INVALID_INCREMENT.match(line): + error(filename, linenum, 'runtime/invalid_increment', 5, + 'Changing pointer instead of value (or unused value of operator*).') def IsMacroDefinition(clean_lines, linenum): - if Search(r"^#define", clean_lines[linenum]): - return True + if re.search(r'^#define', clean_lines[linenum]): + return True - if linenum > 0 and Search(r"\\$", clean_lines[linenum - 1]): - return True + if linenum > 0 and re.search(r'\\$', clean_lines[linenum - 1]): + return True - return False + return False def IsForwardClassDeclaration(clean_lines, linenum): - return Match(r"^\s*(\btemplate\b)*.*class\s+\w+;\s*$", clean_lines[linenum]) + return re.match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) class _BlockInfo(object): - """Stores information about a generic block of code.""" - - def __init__(self, linenum, seen_open_brace): - self.starting_linenum = linenum - self.seen_open_brace = seen_open_brace - self.open_parentheses = 0 - self.inline_asm = _NO_ASM - self.check_namespace_indentation = False - - def CheckBegin(self, filename, clean_lines, linenum, error): - """Run checks that applies to text up to the opening brace. - - This is mostly for checking the text after the class identifier - and the "{", usually where the base class is specified. For other - blocks, there isn't much to check, so we always pass. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - pass + """Stores information about a generic block of code.""" - def CheckEnd(self, filename, clean_lines, linenum, error): - """Run checks that applies to text after the closing brace. + def __init__(self, linenum, seen_open_brace): + self.starting_linenum = linenum + self.seen_open_brace = seen_open_brace + self.open_parentheses = 0 + self.inline_asm = _NO_ASM + self.check_namespace_indentation = False - This is mostly used for checking end of namespace comments. + def CheckBegin(self, filename, clean_lines, linenum, error): + """Run checks that applies to text up to the opening brace. - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - pass + This is mostly for checking the text after the class identifier + and the "{", usually where the base class is specified. For other + blocks, there isn't much to check, so we always pass. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Run checks that applies to text after the closing brace. + + This is mostly used for checking end of namespace comments. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass - def IsBlockInfo(self): - """Returns true if this block is a _BlockInfo. + def IsBlockInfo(self): + """Returns true if this block is a _BlockInfo. - This is convenient for verifying that an object is an instance of - a _BlockInfo, but not an instance of any of the derived classes. + This is convenient for verifying that an object is an instance of + a _BlockInfo, but not an instance of any of the derived classes. - Returns: - True for this class, False for derived classes. - """ - return self.__class__ == _BlockInfo + Returns: + True for this class, False for derived classes. + """ + return self.__class__ == _BlockInfo class _ExternCInfo(_BlockInfo): - """Stores information about an 'extern "C"' block.""" + """Stores information about an 'extern "C"' block.""" - def __init__(self, linenum): - _BlockInfo.__init__(self, linenum, True) + def __init__(self, linenum): + _BlockInfo.__init__(self, linenum, True) class _ClassInfo(_BlockInfo): - """Stores information about a class.""" - - def __init__(self, name, class_or_struct, clean_lines, linenum): - _BlockInfo.__init__(self, linenum, False) - self.name = name - self.is_derived = False - self.check_namespace_indentation = True - if class_or_struct == "struct": - self.access = "public" - self.is_struct = True - else: - self.access = "private" - self.is_struct = False + """Stores information about a class.""" + + def __init__(self, name, class_or_struct, clean_lines, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name + self.is_derived = False + self.check_namespace_indentation = True + if class_or_struct == 'struct': + self.access = 'public' + self.is_struct = True + else: + self.access = 'private' + self.is_struct = False - # Remember initial indentation level for this class. Using raw_lines here - # instead of elided to account for leading comments. - self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) + # Remember initial indentation level for this class. Using raw_lines here + # instead of elided to account for leading comments. + self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) - # Try to find the end of the class. This will be confused by things like: - # class A { - # } *x = { ... - # - # But it's still good enough for CheckSectionSpacing. - self.last_line = 0 - depth = 0 - for i in range(linenum, clean_lines.NumLines()): - line = clean_lines.elided[i] - depth += line.count("{") - line.count("}") - if not depth: - self.last_line = i - break - - def CheckBegin(self, filename, clean_lines, linenum, error): - # Look for a bare ':' - if Search("(^|[^:]):($|[^:])", clean_lines.elided[linenum]): - self.is_derived = True - - def CheckEnd(self, filename, clean_lines, linenum, error): - # If there is a DISALLOW macro, it should appear near the end of - # the class. - seen_last_thing_in_class = False - for i in xrange(linenum - 1, self.starting_linenum, -1): - match = Search( - r"\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(" - + self.name - + r"\)", - clean_lines.elided[i], - ) - if match: - if seen_last_thing_in_class: - error( - filename, - i, - "readability/constructors", - 3, - match.group(1) + " should be the last thing in the class", - ) - break - - if not Match(r"^\s*$", clean_lines.elided[i]): - seen_last_thing_in_class = True - - # Check that closing brace is aligned with beginning of the class. - # Only do this if the closing brace is indented by only whitespaces. - # This means we will not check single-line class definitions. - indent = Match(r"^( *)\}", clean_lines.elided[linenum]) - if indent and len(indent.group(1)) != self.class_indent: - if self.is_struct: - parent = "struct " + self.name - else: - parent = "class " + self.name - error( - filename, - linenum, - "whitespace/indent", - 3, - "Closing brace should be aligned with beginning of %s" % parent, - ) + # Try to find the end of the class. This will be confused by things like: + # class A { + # } *x = { ... + # + # But it's still good enough for CheckSectionSpacing. + self.last_line = 0 + depth = 0 + for i in range(linenum, clean_lines.NumLines()): + line = clean_lines.elided[i] + depth += line.count('{') - line.count('}') + if not depth: + self.last_line = i + break + + def CheckBegin(self, filename, clean_lines, linenum, error): + # Look for a bare ':' + if re.search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): + self.is_derived = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + # If there is a DISALLOW macro, it should appear near the end of + # the class. + seen_last_thing_in_class = False + for i in range(linenum - 1, self.starting_linenum, -1): + match = re.search( + r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' + + self.name + r'\)', + clean_lines.elided[i]) + if match: + if seen_last_thing_in_class: + error(filename, i, 'readability/constructors', 3, + match.group(1) + ' should be the last thing in the class') + break + + if not re.match(r'^\s*$', clean_lines.elided[i]): + seen_last_thing_in_class = True + + # Check that closing brace is aligned with beginning of the class. + # Only do this if the closing brace is indented by only whitespaces. + # This means we will not check single-line class definitions. + indent = re.match(r'^( *)\}', clean_lines.elided[linenum]) + if indent and len(indent.group(1)) != self.class_indent: + if self.is_struct: + parent = 'struct ' + self.name + else: + parent = 'class ' + self.name + error(filename, linenum, 'whitespace/indent', 3, + f'Closing brace should be aligned with beginning of {parent}') class _NamespaceInfo(_BlockInfo): - """Stores information about a namespace.""" + """Stores information about a namespace.""" - def __init__(self, name, linenum): - _BlockInfo.__init__(self, linenum, False) - self.name = name or "" - self.check_namespace_indentation = True + def __init__(self, name, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name or '' + self.check_namespace_indentation = True - def CheckEnd(self, filename, clean_lines, linenum, error): - """Check end of namespace comments.""" - line = clean_lines.raw_lines[linenum] + def CheckEnd(self, filename, clean_lines, linenum, error): + """Check end of namespace comments.""" + line = clean_lines.raw_lines[linenum] - # Check how many lines is enclosed in this namespace. Don't issue - # warning for missing namespace comments if there aren't enough - # lines. However, do apply checks if there is already an end of - # namespace comment and it's incorrect. - # - # TODO(unknown): We always want to check end of namespace comments - # if a namespace is large, but sometimes we also want to apply the - # check if a short namespace contained nontrivial things (something - # other than forward declarations). There is currently no logic on - # deciding what these nontrivial things are, so this check is - # triggered by namespace size only, which works most of the time. - if linenum - self.starting_linenum < 10 and not Match( - r"^\s*};*\s*(//|/\*).*\bnamespace\b", line - ): - return - - # Look for matching comment at end of namespace. - # - # Note that we accept C style "/* */" comments for terminating - # namespaces, so that code that terminate namespaces inside - # preprocessor macros can be cpplint clean. - # - # We also accept stuff like "// end of namespace ." with the - # period at the end. - # - # Besides these, we don't accept anything else, otherwise we might - # get false negatives when existing comment is a substring of the - # expected namespace. - if self.name: - # Named namespace - if not Match( - ( - r"^\s*};*\s*(//|/\*).*\bnamespace\s+" - + re.escape(self.name) - + r"[\*/\.\\\s]*$" - ), - line, - ): - error( - filename, - linenum, - "readability/namespace", - 5, - 'Namespace should be terminated with "// namespace %s"' % self.name, - ) + # Check how many lines is enclosed in this namespace. Don't issue + # warning for missing namespace comments if there aren't enough + # lines. However, do apply checks if there is already an end of + # namespace comment and it's incorrect. + # + # TODO(unknown): We always want to check end of namespace comments + # if a namespace is large, but sometimes we also want to apply the + # check if a short namespace contained nontrivial things (something + # other than forward declarations). There is currently no logic on + # deciding what these nontrivial things are, so this check is + # triggered by namespace size only, which works most of the time. + if (linenum - self.starting_linenum < 10 + and not re.match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)): + return + + # Look for matching comment at end of namespace. + # + # Note that we accept C style "/* */" comments for terminating + # namespaces, so that code that terminate namespaces inside + # preprocessor macros can be cpplint clean. + # + # We also accept stuff like "// end of namespace ." with the + # period at the end. + # + # Besides these, we don't accept anything else, otherwise we might + # get false negatives when existing comment is a substring of the + # expected namespace. + if self.name: + # Named namespace + if not re.match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' + + re.escape(self.name) + r'[\*/\.\\\s]*$'), + line): + error(filename, linenum, 'readability/namespace', 5, + f'Namespace should be terminated with "// namespace {self.name}"') + else: + # Anonymous namespace + if not re.match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): + # If "// namespace anonymous" or "// anonymous namespace (more text)", + # mention "// anonymous namespace" as an acceptable form + if re.match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line): + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"' + ' or "// anonymous namespace"') else: - # Anonymous namespace - if not Match(r"^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$", line): - # If "// namespace anonymous" or "// anonymous namespace (more text)", - # mention "// anonymous namespace" as an acceptable form - if Match(r"^\s*}.*\b(namespace anonymous|anonymous namespace)\b", line): - error( - filename, - linenum, - "readability/namespace", - 5, - 'Anonymous namespace should be terminated with "// namespace"' - ' or "// anonymous namespace"', - ) - else: - error( - filename, - linenum, - "readability/namespace", - 5, - 'Anonymous namespace should be terminated with "// namespace"', - ) + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"') class _PreprocessorInfo(object): - """Stores checkpoints of nesting stacks when #if/#else is seen.""" + """Stores checkpoints of nesting stacks when #if/#else is seen.""" - def __init__(self, stack_before_if): - # The entire nesting stack before #if - self.stack_before_if = stack_before_if + def __init__(self, stack_before_if): + # The entire nesting stack before #if + self.stack_before_if = stack_before_if - # The entire nesting stack up to #else - self.stack_before_else = [] + # The entire nesting stack up to #else + self.stack_before_else = [] - # Whether we have already seen #else or #elif - self.seen_else = False + # Whether we have already seen #else or #elif + self.seen_else = False class NestingState(object): - """Holds states related to parsing braces.""" - - def __init__(self): - # Stack for tracking all braces. An object is pushed whenever we - # see a "{", and popped when we see a "}". Only 3 types of - # objects are possible: - # - _ClassInfo: a class or struct. - # - _NamespaceInfo: a namespace. - # - _BlockInfo: some other type of block. - self.stack = [] - - # Top of the previous stack before each Update(). - # - # Because the nesting_stack is updated at the end of each line, we - # had to do some convoluted checks to find out what is the current - # scope at the beginning of the line. This check is simplified by - # saving the previous top of nesting stack. - # - # We could save the full stack, but we only need the top. Copying - # the full nesting stack would slow down cpplint by ~10%. - self.previous_stack_top = [] - - # Stack of _PreprocessorInfo objects. - self.pp_stack = [] - - def SeenOpenBrace(self): - """Check if we have seen the opening brace for the innermost block. - - Returns: - True if we have seen the opening brace, False if the innermost - block is still expecting an opening brace. - """ - return (not self.stack) or self.stack[-1].seen_open_brace - - def InNamespaceBody(self): - """Check if we are currently one level inside a namespace body. - - Returns: - True if top of the stack is a namespace block, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _NamespaceInfo) - - def InExternC(self): - """Check if we are currently one level inside an 'extern "C"' block. - - Returns: - True if top of the stack is an extern block, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _ExternCInfo) - - def InClassDeclaration(self): - """Check if we are currently one level inside a class or struct declaration. - - Returns: - True if top of the stack is a class/struct, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _ClassInfo) - - def InAsmBlock(self): - """Check if we are currently one level inside an inline ASM block. - - Returns: - True if the top of the stack is a block containing inline ASM. - """ - return self.stack and self.stack[-1].inline_asm != _NO_ASM - - def InTemplateArgumentList(self, clean_lines, linenum, pos): - """Check if current position is inside template argument list. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: position just after the suspected template argument. - Returns: - True if (linenum, pos) is inside template arguments. - """ - while linenum < clean_lines.NumLines(): - # Find the earliest character that might indicate a template argument - line = clean_lines.elided[linenum] - match = Match(r"^[^{};=\[\]\.<>]*(.)", line[pos:]) - if not match: - linenum += 1 - pos = 0 - continue - token = match.group(1) - pos += len(match.group(0)) - - # These things do not look like template argument list: - # class Suspect { - # class Suspect x; } - if token in ("{", "}", ";"): - return False - - # These things look like template argument list: - # template - # template - # template - # template - if token in (">", "=", "[", "]", "."): - return True - - # Check if token is an unmatched '<'. - # If not, move on to the next character. - if token != "<": - pos += 1 - if pos >= len(line): - linenum += 1 - pos = 0 - continue - - # We can't be sure if we just find a single '<', and need to - # find the matching '>'. - (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1) - if end_pos < 0: - # Not sure if template argument list or syntax error in file - return False - linenum = end_line - pos = end_pos - return False + """Holds states related to parsing braces.""" + + def __init__(self): + # Stack for tracking all braces. An object is pushed whenever we + # see a "{", and popped when we see a "}". Only 3 types of + # objects are possible: + # - _ClassInfo: a class or struct. + # - _NamespaceInfo: a namespace. + # - _BlockInfo: some other type of block. + self.stack = [] + + # Top of the previous stack before each Update(). + # + # Because the nesting_stack is updated at the end of each line, we + # had to do some convoluted checks to find out what is the current + # scope at the beginning of the line. This check is simplified by + # saving the previous top of nesting stack. + # + # We could save the full stack, but we only need the top. Copying + # the full nesting stack would slow down cpplint by ~10%. + self.previous_stack_top = [] - def UpdatePreprocessor(self, line): - """Update preprocessor stack. - - We need to handle preprocessors due to classes like this: - #ifdef SWIG - struct ResultDetailsPageElementExtensionPoint { - #else - struct ResultDetailsPageElementExtensionPoint : public Extension { - #endif - - We make the following assumptions (good enough for most files): - - Preprocessor condition evaluates to true from #if up to first - #else/#elif/#endif. - - - Preprocessor condition evaluates to false from #else/#elif up - to #endif. We still perform lint checks on these lines, but - these do not affect nesting stack. - - Args: - line: current line to check. - """ - if Match(r"^\s*#\s*(if|ifdef|ifndef)\b", line): - # Beginning of #if block, save the nesting stack here. The saved - # stack will allow us to restore the parsing state in the #else case. - self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) - elif Match(r"^\s*#\s*(else|elif)\b", line): - # Beginning of #else block - if self.pp_stack: - if not self.pp_stack[-1].seen_else: - # This is the first #else or #elif block. Remember the - # whole nesting stack up to this point. This is what we - # keep after the #endif. - self.pp_stack[-1].seen_else = True - self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) - - # Restore the stack to how it was before the #if - self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) - else: - # TODO(unknown): unexpected #else, issue warning? - pass - elif Match(r"^\s*#\s*endif\b", line): - # End of #if or #else blocks. - if self.pp_stack: - # If we saw an #else, we will need to restore the nesting - # stack to its former state before the #else, otherwise we - # will just continue from where we left off. - if self.pp_stack[-1].seen_else: - # Here we can just use a shallow copy since we are the last - # reference to it. - self.stack = self.pp_stack[-1].stack_before_else - # Drop the corresponding #if - self.pp_stack.pop() - else: - # TODO(unknown): unexpected #endif, issue warning? - pass - - # TODO(unknown): Update() is too long, but we will refactor later. - def Update(self, filename, clean_lines, linenum, error): - """Update nesting state with current line. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Remember top of the previous nesting stack. - # - # The stack is always pushed/popped and not modified in place, so - # we can just do a shallow copy instead of copy.deepcopy. Using - # deepcopy would slow down cpplint by ~28%. - if self.stack: - self.previous_stack_top = self.stack[-1] - else: - self.previous_stack_top = None + # Stack of _PreprocessorInfo objects. + self.pp_stack = [] - # Update pp_stack - self.UpdatePreprocessor(line) + def SeenOpenBrace(self): + """Check if we have seen the opening brace for the innermost block. - # Count parentheses. This is to avoid adding struct arguments to - # the nesting stack. - if self.stack: - inner_block = self.stack[-1] - depth_change = line.count("(") - line.count(")") - inner_block.open_parentheses += depth_change - - # Also check if we are starting or ending an inline assembly block. - if inner_block.inline_asm in (_NO_ASM, _END_ASM): - if ( - depth_change != 0 - and inner_block.open_parentheses == 1 - and _MATCH_ASM.match(line) - ): - # Enter assembly block - inner_block.inline_asm = _INSIDE_ASM - else: - # Not entering assembly block. If previous line was _END_ASM, - # we will now shift to _NO_ASM state. - inner_block.inline_asm = _NO_ASM - elif ( - inner_block.inline_asm == _INSIDE_ASM - and inner_block.open_parentheses == 0 - ): - # Exit assembly block - inner_block.inline_asm = _END_ASM - - # Consume namespace declaration at the beginning of the line. Do - # this in a loop so that we catch same line declarations like this: - # namespace proto2 { namespace bridge { class MessageSet; } } - while True: - # Match start of namespace. The "\b\s*" below catches namespace - # declarations even if it weren't followed by a whitespace, this - # is so that we don't confuse our namespace checker. The - # missing spaces will be flagged by CheckSpacing. - namespace_decl_match = Match(r"^\s*namespace\b\s*([:\w]+)?(.*)$", line) - if not namespace_decl_match: - break - - new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) - self.stack.append(new_namespace) - - line = namespace_decl_match.group(2) - if line.find("{") != -1: - new_namespace.seen_open_brace = True - line = line[line.find("{") + 1 :] - - # Look for a class declaration in whatever is left of the line - # after parsing namespaces. The regexp accounts for decorated classes - # such as in: - # class LOCKABLE API Object { - # }; - class_decl_match = Match( - r"^(\s*(?:template\s*<[\w\s<>,:=]*>\s*)?" - r"(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))" - r"(.*)$", - line, - ) - if class_decl_match and ( - not self.stack or self.stack[-1].open_parentheses == 0 - ): - # We do not want to accept classes that are actually template arguments: - # template , - # template class Ignore3> - # void Function() {}; - # - # To avoid template argument cases, we scan forward and look for - # an unmatched '>'. If we see one, assume we are inside a - # template argument list. - end_declaration = len(class_decl_match.group(1)) - if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration): - self.stack.append( - _ClassInfo( - class_decl_match.group(3), - class_decl_match.group(2), - clean_lines, - linenum, - ) - ) - line = class_decl_match.group(4) - - # If we have not yet seen the opening brace for the innermost block, - # run checks here. - if not self.SeenOpenBrace(): - self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) - - # Update access control if we are inside a class/struct - if self.stack and isinstance(self.stack[-1], _ClassInfo): - classinfo = self.stack[-1] - access_match = Match( - r"^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?" - r":(?:[^:]|$)", - line, - ) - if access_match: - classinfo.access = access_match.group(2) - - # Check that access keywords are indented +1 space. Skip this - # check if the keywords are not preceded by whitespaces. - indent = access_match.group(1) - if len(indent) != classinfo.class_indent + 1 and Match( - r"^\s*$", indent - ): - if classinfo.is_struct: - parent = "struct " + classinfo.name - else: - parent = "class " + classinfo.name - slots = "" - if access_match.group(3): - slots = access_match.group(3) - error( - filename, - linenum, - "whitespace/indent", - 3, - "%s%s: should be indented +1 space inside %s" - % (access_match.group(2), slots, parent), - ) - - # Consume braces or semicolons from what's left of the line - while True: - # Match first brace, semicolon, or closed parenthesis. - matched = Match(r"^[^{;)}]*([{;)}])(.*)$", line) - if not matched: - break - - token = matched.group(1) - if token == "{": - # If namespace or class hasn't seen a opening brace yet, mark - # namespace/class head as complete. Push a new block onto the - # stack otherwise. - if not self.SeenOpenBrace(): - self.stack[-1].seen_open_brace = True - elif Match(r'^extern\s*"[^"]*"\s*\{', line): - self.stack.append(_ExternCInfo(linenum)) - else: - self.stack.append(_BlockInfo(linenum, True)) - if _MATCH_ASM.match(line): - self.stack[-1].inline_asm = _BLOCK_ASM - - elif token == ";" or token == ")": - # If we haven't seen an opening brace yet, but we already saw - # a semicolon, this is probably a forward declaration. Pop - # the stack for these. - # - # Similarly, if we haven't seen an opening brace yet, but we - # already saw a closing parenthesis, then these are probably - # function arguments with extra "class" or "struct" keywords. - # Also pop these stack for these. - if not self.SeenOpenBrace(): - self.stack.pop() - else: # token == '}' - # Perform end of block checks and pop the stack. - if self.stack: - self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) - self.stack.pop() - line = matched.group(2) - - def InnermostClass(self): - """Get class info on the top of the stack. - - Returns: - A _ClassInfo object if we are inside a class, or None otherwise. - """ - for i in range(len(self.stack), 0, -1): - classinfo = self.stack[i - 1] - if isinstance(classinfo, _ClassInfo): - return classinfo - return None + Returns: + True if we have seen the opening brace, False if the innermost + block is still expecting an opening brace. + """ + return (not self.stack) or self.stack[-1].seen_open_brace - def CheckCompletedBlocks(self, filename, error): - """Checks that all classes and namespaces have been completely parsed. - - Call this when all lines in a file have been processed. - Args: - filename: The name of the current file. - error: The function to call with any errors found. - """ - # Note: This test can result in false positives if #ifdef constructs - # get in the way of brace matching. See the testBuildClass test in - # cpplint_unittest.py for an example of this. - for obj in self.stack: - if isinstance(obj, _ClassInfo): - error( - filename, - obj.starting_linenum, - "build/class", - 5, - "Failed to find complete declaration of class %s" % obj.name, - ) - elif isinstance(obj, _NamespaceInfo): - error( - filename, - obj.starting_linenum, - "build/namespaces", - 5, - "Failed to find complete declaration of namespace %s" % obj.name, - ) - - -def CheckForNonStandardConstructs(filename, clean_lines, linenum, nesting_state, error): - r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. - - Complain about several constructs which gcc-2 accepts, but which are - not standard C++. Warning about these in lint is one way to ease the - transition to new compilers. - - put storage class first (e.g. "static const" instead of "const static"). - - "%lld" instead of %qd" in printf-type functions. - - "%1$d" is non-standard in printf-type functions. - - "\%" is an undefined character escape sequence. - - text after #endif is not allowed. - - invalid inner-style forward declaration. - - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?", line): - error( - filename, - linenum, - "build/deprecated", - 3, - ">? and ))?' - # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' - error( - filename, - linenum, - "runtime/member_string_references", - 2, - "const string& members are dangerous. It is much better to use " - "alternatives, such as pointers or simple constants.", - ) - - # Everything else in this function operates on class declarations. - # Return early if the top of the nesting stack is not a class, or if - # the class head is not completed yet. - classinfo = nesting_state.InnermostClass() - if not classinfo or not classinfo.seen_open_brace: - return + Returns: + True if top of the stack is an extern block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ExternCInfo) - # The class may have been declared with namespace or classname qualifiers. - # The constructor and destructor will not have those qualifiers. - base_classname = classinfo.name.split("::")[-1] - - # Look for single-argument constructors that aren't marked explicit. - # Technically a valid construct, but against style. - explicit_constructor_match = Match( - r"\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?" - r"(?:(?:inline|constexpr)\s+)*%s\s*" - r"\(((?:[^()]|\([^()]*\))*)\)" % re.escape(base_classname), - line, - ) + def InClassDeclaration(self): + """Check if we are currently one level inside a class or struct declaration. - if explicit_constructor_match: - is_marked_explicit = explicit_constructor_match.group(1) + Returns: + True if top of the stack is a class/struct, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ClassInfo) - if not explicit_constructor_match.group(2): - constructor_args = [] - else: - constructor_args = explicit_constructor_match.group(2).split(",") - - # collapse arguments so that commas in template parameter lists and function - # argument parameter lists don't split arguments in two - i = 0 - while i < len(constructor_args): - constructor_arg = constructor_args[i] - while constructor_arg.count("<") > constructor_arg.count( - ">" - ) or constructor_arg.count("(") > constructor_arg.count(")"): - constructor_arg += "," + constructor_args[i + 1] - del constructor_args[i + 1] - constructor_args[i] = constructor_arg - i += 1 - - variadic_args = [arg for arg in constructor_args if "&&..." in arg] - defaulted_args = [arg for arg in constructor_args if "=" in arg] - noarg_constructor = ( - not constructor_args - or # empty arg list - # 'void' arg specifier - (len(constructor_args) == 1 and constructor_args[0].strip() == "void") - ) - onearg_constructor = ( - (len(constructor_args) == 1 and not noarg_constructor) # exactly one arg - or - # all but at most one arg defaulted - ( - len(constructor_args) >= 1 - and not noarg_constructor - and len(defaulted_args) >= len(constructor_args) - 1 - ) - or - # variadic arguments with zero or one argument - (len(constructor_args) <= 2 and len(variadic_args) >= 1) - ) - initializer_list_constructor = bool( - onearg_constructor - and Search(r"\bstd\s*::\s*initializer_list\b", constructor_args[0]) - ) - copy_constructor = bool( - onearg_constructor - and Match( - r"((const\s+(volatile\s+)?)?|(volatile\s+(const\s+)?))?" - r"%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&" - % re.escape(base_classname), - constructor_args[0].strip(), - ) - ) - - if ( - not is_marked_explicit - and onearg_constructor - and not initializer_list_constructor - and not copy_constructor - ): - if defaulted_args or variadic_args: - error( - filename, - linenum, - "runtime/explicit", - 5, - "Constructors callable with one argument " - "should be marked explicit.", - ) - else: - error( - filename, - linenum, - "runtime/explicit", - 5, - "Single-parameter constructors should be marked explicit.", - ) - elif is_marked_explicit and not onearg_constructor: - if noarg_constructor: - error( - filename, - linenum, - "runtime/explicit", - 5, - "Zero-parameter constructors should not be marked explicit.", - ) + def InAsmBlock(self): + """Check if we are currently one level inside an inline ASM block. + Returns: + True if the top of the stack is a block containing inline ASM. + """ + return self.stack and self.stack[-1].inline_asm != _NO_ASM -def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): - """Checks for the correctness of various spacing around function calls. + def InTemplateArgumentList(self, clean_lines, linenum, pos): + """Check if current position is inside template argument list. Args: - filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. - error: The function to call with any errors found. + pos: position just after the suspected template argument. + Returns: + True if (linenum, pos) is inside template arguments. """ - line = clean_lines.elided[linenum] - - # Since function calls often occur inside if/for/while/switch - # expressions - which have their own, more liberal conventions - we - # first see if we should be looking inside such an expression for a - # function call, to which we can apply more strict standards. - fncall = line # if there's no control flow construct, look at whole line - for pattern in ( - r"\bif\s*\((.*)\)\s*{", - r"\bfor\s*\((.*)\)\s*{", - r"\bwhile\s*\((.*)\)\s*[{;]", - r"\bswitch\s*\((.*)\)\s*{", - ): - match = Search(pattern, line) - if match: - fncall = match.group(1) # look inside the parens for function calls - break + while linenum < clean_lines.NumLines(): + # Find the earliest character that might indicate a template argument + line = clean_lines.elided[linenum] + match = re.match(r'^[^{};=\[\]\.<>]*(.)', line[pos:]) + if not match: + linenum += 1 + pos = 0 + continue + token = match.group(1) + pos += len(match.group(0)) + + # These things do not look like template argument list: + # class Suspect { + # class Suspect x; } + if token in ('{', '}', ';'): return False + + # These things look like template argument list: + # template + # template + # template + # template + if token in ('>', '=', '[', ']', '.'): return True + + # Check if token is an unmatched '<'. + # If not, move on to the next character. + if token != '<': + pos += 1 + if pos >= len(line): + linenum += 1 + pos = 0 + continue + + # We can't be sure if we just find a single '<', and need to + # find the matching '>'. + (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1) + if end_pos < 0: + # Not sure if template argument list or syntax error in file + return False + linenum = end_line + pos = end_pos + return False - # Except in if/for/while/switch, there should never be space - # immediately inside parens (eg "f( 3, 4 )"). We make an exception - # for nested parens ( (a+b) + c ). Likewise, there should never be - # a space before a ( when it's a function argument. I assume it's a - # function argument when the char before the whitespace is legal in - # a function name (alnum + _) and we're not starting a macro. Also ignore - # pointers and references to arrays and functions coz they're too tricky: - # we use a very simple way to recognize these: - # " (something)(maybe-something)" or - # " (something)(maybe-something," or - # " (something)[something]" - # Note that we assume the contents of [] to be short enough that - # they'll never need to wrap. - if ( # Ignore control structures. - not Search(r"\b(if|for|while|switch|return|new|delete|catch|sizeof)\b", fncall) - and - # Ignore pointers/references to functions. - not Search(r" \([^)]+\)\([^)]*(\)|,$)", fncall) - and - # Ignore pointers/references to arrays. - not Search(r" \([^)]+\)\[[^\]]+\]", fncall) - ): - if Search(r"\w\s*\(\s(?!\s*\\$)", fncall): # a ( used for a fn call - error( - filename, - linenum, - "whitespace/parens", - 4, - "Extra space after ( in function call", - ) - elif Search(r"\(\s+(?!(\s*\\)|\()", fncall): - error(filename, linenum, "whitespace/parens", 2, "Extra space after (") - if ( - Search(r"\w\s+\(", fncall) - and not Search(r"_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(", fncall) - and not Search(r"#\s*define|typedef|using\s+\w+\s*=", fncall) - and not Search(r"\w\s+\((\w+::)*\*\w+\)\(", fncall) - and not Search(r"\bcase\s+\(", fncall) - ): - # TODO(unknown): Space after an operator function seem to be a common - # error, silence those for now by restricting them to highest verbosity. - if Search(r"\boperator_*\b", line): - error( - filename, - linenum, - "whitespace/parens", - 0, - "Extra space before ( in function call", - ) - else: - error( - filename, - linenum, - "whitespace/parens", - 4, - "Extra space before ( in function call", - ) - # If the ) is followed only by a newline or a { + newline, assume it's - # part of a control statement (if/while/etc), and don't complain - if Search(r"[^)]\s+\)\s*[^{\s]", fncall): - # If the closing parenthesis is preceded by only whitespaces, - # try to give a more descriptive error message. - if Search(r"^\s+\)", fncall): - error( - filename, - linenum, - "whitespace/parens", - 2, - "Closing ) should be moved to the previous line", - ) - else: - error(filename, linenum, "whitespace/parens", 2, "Extra space before )") + def UpdatePreprocessor(self, line): + """Update preprocessor stack. + We need to handle preprocessors due to classes like this: + #ifdef SWIG + struct ResultDetailsPageElementExtensionPoint { + #else + struct ResultDetailsPageElementExtensionPoint : public Extension { + #endif -def IsBlankLine(line): - """Returns true if the given line is blank. + We make the following assumptions (good enough for most files): + - Preprocessor condition evaluates to true from #if up to first + #else/#elif/#endif. - We consider a line to be blank if the line is empty or consists of - only white spaces. + - Preprocessor condition evaluates to false from #else/#elif up + to #endif. We still perform lint checks on these lines, but + these do not affect nesting stack. Args: - line: A line of a string. - - Returns: - True, if the given line is blank. + line: current line to check. """ - return not line or line.isspace() - - -def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, error): - is_namespace_indent_item = ( - len(nesting_state.stack) > 1 - and nesting_state.stack[-1].check_namespace_indentation - and isinstance(nesting_state.previous_stack_top, _NamespaceInfo) - and nesting_state.previous_stack_top == nesting_state.stack[-2] - ) - - if ShouldCheckNamespaceIndentation( - nesting_state, is_namespace_indent_item, clean_lines.elided, line - ): - CheckItemIndentationInNamespace(filename, clean_lines.elided, line, error) - - -def CheckForFunctionLengths(filename, clean_lines, linenum, function_state, error): - """Reports for long function bodies. - - For an overview why this is done, see: - https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions + if re.match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): + # Beginning of #if block, save the nesting stack here. The saved + # stack will allow us to restore the parsing state in the #else case. + self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) + elif re.match(r'^\s*#\s*(else|elif)\b', line): + # Beginning of #else block + if self.pp_stack: + if not self.pp_stack[-1].seen_else: + # This is the first #else or #elif block. Remember the + # whole nesting stack up to this point. This is what we + # keep after the #endif. + self.pp_stack[-1].seen_else = True + self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) + + # Restore the stack to how it was before the #if + self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) + else: + # TODO(unknown): unexpected #else, issue warning? + pass + elif re.match(r'^\s*#\s*endif\b', line): + # End of #if or #else blocks. + if self.pp_stack: + # If we saw an #else, we will need to restore the nesting + # stack to its former state before the #else, otherwise we + # will just continue from where we left off. + if self.pp_stack[-1].seen_else: + # Here we can just use a shallow copy since we are the last + # reference to it. + self.stack = self.pp_stack[-1].stack_before_else + # Drop the corresponding #if + self.pp_stack.pop() + else: + # TODO(unknown): unexpected #endif, issue warning? + pass - Uses a simplistic algorithm assuming other style guidelines - (especially spacing) are followed. - Only checks unindented functions, so class members are unchecked. - Trivial bodies are unchecked, so constructors with huge initializer lists - may be missed. - Blank/comment lines are not counted so as to avoid encouraging the removal - of vertical space and comments just to get through a lint check. - NOLINT *on the last line of a function* disables this check. + # TODO(unknown): Update() is too long, but we will refactor later. + def Update(self, filename, clean_lines, linenum, error): + """Update nesting state with current line. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. - function_state: Current function name and lines in body so far. error: The function to call with any errors found. """ - lines = clean_lines.lines - line = lines[linenum] - joined_line = "" - - starting_func = False - regexp = r"(\w(\w|::|\*|\&|\s)*)\(" # decls * & space::name( ... - match_result = Match(regexp, line) - if match_result: - # If the name is all caps and underscores, figure it's a macro and - # ignore it, unless it's TEST or TEST_F. - function_name = match_result.group(1).split()[-1] - if ( - function_name == "TEST" - or function_name == "TEST_F" - or (not Match(r"[A-Z_]+$", function_name)) - ): - starting_func = True - - if starting_func: - body_found = False - for start_linenum in xrange(linenum, clean_lines.NumLines()): - start_line = lines[start_linenum] - joined_line += " " + start_line.lstrip() - if Search(r"(;|})", start_line): # Declarations and trivial functions - body_found = True - break # ... ignore - if Search(r"{", start_line): - body_found = True - function = Search(r"((\w|:)*)\(", line).group(1) - if Match(r"TEST", function): # Handle TEST... macros - parameter_regexp = Search(r"(\(.*\))", joined_line) - if parameter_regexp: # Ignore bad syntax - function += parameter_regexp.group(1) - else: - function += "()" - function_state.Begin(function) - break - if not body_found: - # No body for the function (or evidence of a non-function) was found. - error( - filename, - linenum, - "readability/fn_size", - 5, - "Lint failed to find start of function body.", - ) - elif Match(r"^\}\s*$", line): # function end - function_state.Check(error, filename, linenum) - function_state.End() - elif not Match(r"^\s*$", line): - function_state.Count() # Count non-blank/non-comment lines. - - -_RE_PATTERN_TODO = re.compile(r"^//(\s*)TODO(\(.+?\))?:?(\s|$)?") + line = clean_lines.elided[linenum] + # Remember top of the previous nesting stack. + # + # The stack is always pushed/popped and not modified in place, so + # we can just do a shallow copy instead of copy.deepcopy. Using + # deepcopy would slow down cpplint by ~28%. + if self.stack: + self.previous_stack_top = self.stack[-1] + else: + self.previous_stack_top = None + + # Update pp_stack + self.UpdatePreprocessor(line) + + # Count parentheses. This is to avoid adding struct arguments to + # the nesting stack. + if self.stack: + inner_block = self.stack[-1] + depth_change = line.count('(') - line.count(')') + inner_block.open_parentheses += depth_change + + # Also check if we are starting or ending an inline assembly block. + if inner_block.inline_asm in (_NO_ASM, _END_ASM): + if (depth_change != 0 and + inner_block.open_parentheses == 1 and + _MATCH_ASM.match(line)): + # Enter assembly block + inner_block.inline_asm = _INSIDE_ASM + else: + # Not entering assembly block. If previous line was _END_ASM, + # we will now shift to _NO_ASM state. + inner_block.inline_asm = _NO_ASM + elif (inner_block.inline_asm == _INSIDE_ASM and + inner_block.open_parentheses == 0): + # Exit assembly block + inner_block.inline_asm = _END_ASM + + # Consume namespace declaration at the beginning of the line. Do + # this in a loop so that we catch same line declarations like this: + # namespace proto2 { namespace bridge { class MessageSet; } } + while True: + # Match start of namespace. The "\b\s*" below catches namespace + # declarations even if it weren't followed by a whitespace, this + # is so that we don't confuse our namespace checker. The + # missing spaces will be flagged by CheckSpacing. + namespace_decl_match = re.match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) + if not namespace_decl_match: + break + + new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) + self.stack.append(new_namespace) + + line = namespace_decl_match.group(2) + if line.find('{') != -1: + new_namespace.seen_open_brace = True + line = line[line.find('{') + 1:] + + # Look for a class declaration in whatever is left of the line + # after parsing namespaces. The regexp accounts for decorated classes + # such as in: + # class LOCKABLE API Object { + # }; + class_decl_match = re.match( + r'^(\s*(?:template\s*<[\w\s<>,:=]*>\s*)?' + r'(class|struct)\s+(?:[a-zA-Z0-9_]+\s+)*(\w+(?:::\w+)*))' + r'(.*)$', line) + if (class_decl_match and + (not self.stack or self.stack[-1].open_parentheses == 0)): + # We do not want to accept classes that are actually template arguments: + # template , + # template class Ignore3> + # void Function() {}; + # + # To avoid template argument cases, we scan forward and look for + # an unmatched '>'. If we see one, assume we are inside a + # template argument list. + end_declaration = len(class_decl_match.group(1)) + if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration): + self.stack.append(_ClassInfo( + class_decl_match.group(3), class_decl_match.group(2), + clean_lines, linenum)) + line = class_decl_match.group(4) + + # If we have not yet seen the opening brace for the innermost block, + # run checks here. + if not self.SeenOpenBrace(): + self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) + + # Update access control if we are inside a class/struct + if self.stack and isinstance(self.stack[-1], _ClassInfo): + classinfo = self.stack[-1] + access_match = re.match( + r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' + r':(?:[^:]|$)', + line) + if access_match: + classinfo.access = access_match.group(2) + + # Check that access keywords are indented +1 space. Skip this + # check if the keywords are not preceded by whitespaces. + indent = access_match.group(1) + if (len(indent) != classinfo.class_indent + 1 and + re.match(r'^\s*$', indent)): + if classinfo.is_struct: + parent = 'struct ' + classinfo.name + else: + parent = 'class ' + classinfo.name + slots = '' + if access_match.group(3): + slots = access_match.group(3) + error(filename, linenum, 'whitespace/indent', 3, + f'{access_match.group(2)}{slots}:' + f' should be indented +1 space inside {parent}') + + # Consume braces or semicolons from what's left of the line + while True: + # Match first brace, semicolon, or closed parenthesis. + matched = re.match(r'^[^{;)}]*([{;)}])(.*)$', line) + if not matched: + break + + token = matched.group(1) + if token == '{': + # If namespace or class hasn't seen a opening brace yet, mark + # namespace/class head as complete. Push a new block onto the + # stack otherwise. + if not self.SeenOpenBrace(): + self.stack[-1].seen_open_brace = True + elif re.match(r'^extern\s*"[^"]*"\s*\{', line): + self.stack.append(_ExternCInfo(linenum)) + else: + self.stack.append(_BlockInfo(linenum, True)) + if _MATCH_ASM.match(line): + self.stack[-1].inline_asm = _BLOCK_ASM + + elif token == ';' or token == ')': + # If we haven't seen an opening brace yet, but we already saw + # a semicolon, this is probably a forward declaration. Pop + # the stack for these. + # + # Similarly, if we haven't seen an opening brace yet, but we + # already saw a closing parenthesis, then these are probably + # function arguments with extra "class" or "struct" keywords. + # Also pop these stack for these. + if not self.SeenOpenBrace(): + self.stack.pop() + else: # token == '}' + # Perform end of block checks and pop the stack. + if self.stack: + self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) + self.stack.pop() + line = matched.group(2) -def CheckComment(line, filename, linenum, next_line_start, error): - """Checks for common mistakes in comments. + def InnermostClass(self): + """Get class info on the top of the stack. - Args: - line: The line in question. - filename: The name of the current file. - linenum: The number of the line to check. - next_line_start: The first non-whitespace column of the next line. - error: The function to call with any errors found. + Returns: + A _ClassInfo object if we are inside a class, or None otherwise. """ - commentpos = line.find("//") - if commentpos != -1: - # Check if the // may be in quotes. If so, ignore it - if re.sub(r"\\.", "", line[0:commentpos]).count('"') % 2 == 0: - # Allow one space for new scopes, two spaces otherwise: - if not (Match(r"^.*{ *//", line) and next_line_start == commentpos) and ( - (commentpos >= 1 and line[commentpos - 1] not in string.whitespace) - or (commentpos >= 2 and line[commentpos - 2] not in string.whitespace) - ): - error( - filename, - linenum, - "whitespace/comments", - 2, - "At least two spaces is best between code and comments", - ) - - # Checks for common mistakes in TODO comments. - comment = line[commentpos:] - match = _RE_PATTERN_TODO.match(comment) - if match: - # One whitespace is correct; zero whitespace is handled elsewhere. - leading_whitespace = match.group(1) - if len(leading_whitespace) > 1: - error( - filename, - linenum, - "whitespace/todo", - 2, - "Too many spaces before TODO", - ) - - username = match.group(2) - if not username: - error( - filename, - linenum, - "readability/todo", - 2, - "Missing username in TODO; it should look like " - '"// TODO(my_username): Stuff."', - ) - - middle_whitespace = match.group(3) - # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison - if middle_whitespace != " " and middle_whitespace != "": - error( - filename, - linenum, - "whitespace/todo", - 2, - "TODO(my_username) should be followed by a space", - ) - - # If the comment contains an alphanumeric character, there - # should be a space somewhere between it and the // unless - # it's a /// or //! Doxygen comment. - if Match(r"//[^ ]*\w", comment) and not Match( - r"(///|//\!)(\s+|$)", comment - ): - error( - filename, - linenum, - "whitespace/comments", - 4, - "Should have a space between // and comment", - ) - - -def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): - """Checks for the correctness of various spacing issues in the code. + for i in range(len(self.stack), 0, -1): + classinfo = self.stack[i - 1] + if isinstance(classinfo, _ClassInfo): + return classinfo + return None + +def CheckForNonStandardConstructs(filename, clean_lines, linenum, + nesting_state, error): + r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. + + Complain about several constructs which gcc-2 accepts, but which are + not standard C++. Warning about these in lint is one way to ease the + transition to new compilers. + - put storage class first (e.g. "static const" instead of "const static"). + - "%lld" instead of %qd" in printf-type functions. + - "%1$d" is non-standard in printf-type functions. + - "\%" is an undefined character escape sequence. + - text after #endif is not allowed. + - invalid inner-style forward declaration. + - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', + line): + error(filename, linenum, 'build/deprecated', 3, + '>? and ))?' + # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' + error(filename, linenum, 'runtime/member_string_references', 2, + 'const string& members are dangerous. It is much better to use ' + 'alternatives, such as pointers or simple constants.') + + # Everything else in this function operates on class declarations. + # Return early if the top of the nesting stack is not a class, or if + # the class head is not completed yet. + classinfo = nesting_state.InnermostClass() + if not classinfo or not classinfo.seen_open_brace: + return + + # The class may have been declared with namespace or classname qualifiers. + # The constructor and destructor will not have those qualifiers. + base_classname = classinfo.name.split('::')[-1] + + # Look for single-argument constructors that aren't marked explicit. + # Technically a valid construct, but against style. + explicit_constructor_match = re.match( + r'\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?' + rf'(?:(?:inline|constexpr)\s+)*{re.escape(base_classname)}\s*' + r'\(((?:[^()]|\([^()]*\))*)\)', line) + + if explicit_constructor_match: + is_marked_explicit = explicit_constructor_match.group(1) + + if not explicit_constructor_match.group(2): + constructor_args = [] + else: + constructor_args = explicit_constructor_match.group(2).split(',') + + # collapse arguments so that commas in template parameter lists and function + # argument parameter lists don't split arguments in two + i = 0 + while i < len(constructor_args): + constructor_arg = constructor_args[i] + while (constructor_arg.count('<') > constructor_arg.count('>') or + constructor_arg.count('(') > constructor_arg.count(')')): + constructor_arg += ',' + constructor_args[i + 1] + del constructor_args[i + 1] + constructor_args[i] = constructor_arg + i += 1 + + variadic_args = [arg for arg in constructor_args if '&&...' in arg] + defaulted_args = [arg for arg in constructor_args if '=' in arg] + noarg_constructor = (not constructor_args or # empty arg list + # 'void' arg specifier + (len(constructor_args) == 1 and + constructor_args[0].strip() == 'void')) + onearg_constructor = ((len(constructor_args) == 1 and # exactly one arg + not noarg_constructor) or + # all but at most one arg defaulted + (len(constructor_args) >= 1 and + not noarg_constructor and + len(defaulted_args) >= len(constructor_args) - 1) or + # variadic arguments with zero or one argument + (len(constructor_args) <= 2 and + len(variadic_args) >= 1)) + initializer_list_constructor = bool( + onearg_constructor and + re.search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) + copy_constructor = bool( + onearg_constructor and + re.match(r'((const\s+(volatile\s+)?)?|(volatile\s+(const\s+)?))?' + rf'{re.escape(base_classname)}(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&', + constructor_args[0].strip()) + ) - Things we check for: spaces around operators, spaces after - if/for/while/switch, no spaces around parens in function calls, two - spaces between code and comment, don't start a block with a blank - line, don't end a function with a blank line, don't add a blank line - after public/protected/private, don't have too many blank lines in a row. + if (not is_marked_explicit and + onearg_constructor and + not initializer_list_constructor and + not copy_constructor): + if defaulted_args or variadic_args: + error(filename, linenum, 'runtime/explicit', 4, + 'Constructors callable with one argument ' + 'should be marked explicit.') + else: + error(filename, linenum, 'runtime/explicit', 4, + 'Single-parameter constructors should be marked explicit.') - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # Don't use "elided" lines here, otherwise we can't check commented lines. - # Don't want to use "raw" either, because we don't want to check inside C++11 - # raw strings, - raw = clean_lines.lines_without_raw_strings - line = raw[linenum] +def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): + """Checks for the correctness of various spacing around function calls. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Since function calls often occur inside if/for/while/switch + # expressions - which have their own, more liberal conventions - we + # first see if we should be looking inside such an expression for a + # function call, to which we can apply more strict standards. + fncall = line # if there's no control flow construct, look at whole line + for pattern in (r'\bif\s*\((.*)\)\s*{', + r'\bfor\s*\((.*)\)\s*{', + r'\bwhile\s*\((.*)\)\s*[{;]', + r'\bswitch\s*\((.*)\)\s*{'): + match = re.search(pattern, line) + if match: + fncall = match.group(1) # look inside the parens for function calls + break + + # Except in if/for/while/switch, there should never be space + # immediately inside parens (eg "f( 3, 4 )"). We make an exception + # for nested parens ( (a+b) + c ). Likewise, there should never be + # a space before a ( when it's a function argument. I assume it's a + # function argument when the char before the whitespace is legal in + # a function name (alnum + _) and we're not starting a macro. Also ignore + # pointers and references to arrays and functions coz they're too tricky: + # we use a very simple way to recognize these: + # " (something)(maybe-something)" or + # " (something)(maybe-something," or + # " (something)[something]" + # Note that we assume the contents of [] to be short enough that + # they'll never need to wrap. + if ( # Ignore control structures. + not re.search(r'\b(if|elif|for|while|switch|return|new|delete|catch|sizeof)\b', + fncall) and + # Ignore pointers/references to functions. + not re.search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and + # Ignore pointers/references to arrays. + not re.search(r' \([^)]+\)\[[^\]]+\]', fncall)): + if re.search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space after ( in function call') + elif re.search(r'\(\s+(?!(\s*\\)|\()', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space after (') + if (re.search(r'\w\s+\(', fncall) and + not re.search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and + not re.search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and + not re.search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and + not re.search(r'\bcase\s+\(', fncall)): + # TODO(unknown): Space after an operator function seem to be a common + # error, silence those for now by restricting them to highest verbosity. + if re.search(r'\boperator_*\b', line): + error(filename, linenum, 'whitespace/parens', 0, + 'Extra space before ( in function call') + else: + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space before ( in function call') + # If the ) is followed only by a newline or a { + newline, assume it's + # part of a control statement (if/while/etc), and don't complain + if re.search(r'[^)]\s+\)\s*[^{\s]', fncall): + # If the closing parenthesis is preceded by only whitespaces, + # try to give a more descriptive error message. + if re.search(r'^\s+\)', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Closing ) should be moved to the previous line') + else: + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space before )') - # Before nixing comments, check if the line is blank for no good - # reason. This includes the first line after a block is opened, and - # blank lines at the end of a function (ie, right before a line like '}' - # - # Skip all the blank line checks if we are immediately inside a - # namespace body. In other words, don't issue blank line warnings - # for this block: - # namespace { - # - # } - # - # A warning about missing end of namespace comments will be issued instead. - # - # Also skip blank line checks for 'extern "C"' blocks, which are formatted - # like namespaces. - if ( - IsBlankLine(line) - and not nesting_state.InNamespaceBody() - and not nesting_state.InExternC() - ): - elided = clean_lines.elided - prev_line = elided[linenum - 1] - prevbrace = prev_line.rfind("{") - # TODO(unknown): Don't complain if line before blank line, and line after, - # both start with alnums and are indented the same amount. - # This ignores whitespace at the start of a namespace block - # because those are not usually indented. - if prevbrace != -1 and prev_line[prevbrace:].find("}") == -1: - # OK, we have a blank line at the start of a code block. Before we - # complain, we check if it is an exception to the rule: The previous - # non-empty line has the parameters of a function header that are indented - # 4 spaces (because they did not fit in a 80 column line when placed on - # the same line as the function name). We also check for the case where - # the previous line is indented 6 spaces, which may happen when the - # initializers of a constructor do not fit into a 80 column line. - exception = False - if Match(r" {6}\w", prev_line): # Initializer list? - # We are looking for the opening column of initializer list, which - # should be indented 4 spaces to cause 6 space indentation afterwards. - search_position = linenum - 2 - while search_position >= 0 and Match( - r" {6}\w", elided[search_position] - ): - search_position -= 1 - exception = ( - search_position >= 0 and elided[search_position][:5] == " :" - ) - else: - # Search for the function arguments or an initializer list. We use a - # simple heuristic here: If the line is indented 4 spaces; and we have a - # closing paren, without the opening paren, followed by an opening brace - # or colon (for initializer lists) we assume that it is the last line of - # a function header. If we have a colon indented 4 spaces, it is an - # initializer list. - exception = Match( - r" {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)", prev_line - ) or Match(r" {4}:", prev_line) - - if not exception: - error( - filename, - linenum, - "whitespace/blank_line", - 2, - "Redundant blank line at the start of a code block " - "should be deleted.", - ) - # Ignore blank lines at the end of a block in a long if-else - # chain, like this: - # if (condition1) { - # // Something followed by a blank line - # - # } else if (condition2) { - # // Something else - # } - if linenum + 1 < clean_lines.NumLines(): - next_line = raw[linenum + 1] - if ( - next_line - and Match(r"\s*}", next_line) - and next_line.find("} else ") == -1 - ): - error( - filename, - linenum, - "whitespace/blank_line", - 3, - "Redundant blank line at the end of a code block " - "should be deleted.", - ) - - matched = Match(r"\s*(public|protected|private):", prev_line) - if matched: - error( - filename, - linenum, - "whitespace/blank_line", - 3, - 'Do not leave a blank line after "%s:"' % matched.group(1), - ) - - # Next, check comments - next_line_start = 0 - if linenum + 1 < clean_lines.NumLines(): - next_line = raw[linenum + 1] - next_line_start = len(next_line) - len(next_line.lstrip()) - CheckComment(line, filename, linenum, next_line_start, error) - # get rid of comments and strings - line = clean_lines.elided[linenum] +def IsBlankLine(line): + """Returns true if the given line is blank. + + We consider a line to be blank if the line is empty or consists of + only white spaces. + + Args: + line: A line of a string. + + Returns: + True, if the given line is blank. + """ + return not line or line.isspace() + + +def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error): + is_namespace_indent_item = ( + len(nesting_state.stack) >= 1 and + (isinstance(nesting_state.stack[-1], _NamespaceInfo) or + (isinstance(nesting_state.previous_stack_top, _NamespaceInfo))) + ) + + if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + clean_lines.elided, line): + CheckItemIndentationInNamespace(filename, clean_lines.elided, + line, error) + + +def CheckForFunctionLengths(filename, clean_lines, linenum, + function_state, error): + """Reports for long function bodies. + + For an overview why this is done, see: + https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions + + Uses a simplistic algorithm assuming other style guidelines + (especially spacing) are followed. + Only checks unindented functions, so class members are unchecked. + Trivial bodies are unchecked, so constructors with huge initializer lists + may be missed. + Blank/comment lines are not counted so as to avoid encouraging the removal + of vertical space and comments just to get through a lint check. + NOLINT *on the last line of a function* disables this check. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + function_state: Current function name and lines in body so far. + error: The function to call with any errors found. + """ + lines = clean_lines.lines + line = lines[linenum] + joined_line = '' + + starting_func = False + regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... + match_result = re.match(regexp, line) + if match_result: + # If the name is all caps and underscores, figure it's a macro and + # ignore it, unless it's TEST or TEST_F. + function_name = match_result.group(1).split()[-1] + if function_name == 'TEST' or function_name == 'TEST_F' or ( + not re.match(r'[A-Z_]+$', function_name)): + starting_func = True + + if starting_func: + body_found = False + for start_linenum in range(linenum, clean_lines.NumLines()): + start_line = lines[start_linenum] + joined_line += ' ' + start_line.lstrip() + if re.search(r'(;|})', start_line): # Declarations and trivial functions + body_found = True + break # ... ignore + if re.search(r'{', start_line): + body_found = True + function = re.search(r'((\w|:)*)\(', line).group(1) + if re.match(r'TEST', function): # Handle TEST... macros + parameter_regexp = re.search(r'(\(.*\))', joined_line) + if parameter_regexp: # Ignore bad syntax + function += parameter_regexp.group(1) + else: + function += '()' + function_state.Begin(function) + break + if not body_found: + # No body for the function (or evidence of a non-function) was found. + error(filename, linenum, 'readability/fn_size', 5, + 'Lint failed to find start of function body.') + elif re.match(r'^\}\s*$', line): # function end + function_state.Check(error, filename, linenum) + function_state.End() + elif not re.match(r'^\s*$', line): + function_state.Count() # Count non-blank/non-comment lines. - # You shouldn't have spaces before your brackets, except maybe after - # 'delete []', 'return []() {};', or 'auto [abc, ...] = ...;'. - if Search(r"\w\s+\[", line) and not Search(r"(?:auto&?|delete|return)\s+\[", line): - error(filename, linenum, "whitespace/braces", 5, "Extra space before [") - # In range-based for, we wanted spaces before and after the colon, but - # not around "::" tokens that might appear. - if Search(r"for *\(.*[^:]:[^: ]", line) or Search(r"for *\(.*[^: ]:[^:]", line): - error( - filename, - linenum, - "whitespace/forcolon", - 2, - "Missing space around colon in range-based for loop", - ) +_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') -def CheckOperatorSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing around operators. +def CheckComment(line, filename, linenum, next_line_start, error): + """Checks for common mistakes in comments. + + Args: + line: The line in question. + filename: The name of the current file. + linenum: The number of the line to check. + next_line_start: The first non-whitespace column of the next line. + error: The function to call with any errors found. + """ + commentpos = line.find('//') + if commentpos != -1: + # Check if the // may be in quotes. If so, ignore it + if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0: + # Allow one space for new scopes, two spaces otherwise: + if (not (re.match(r'^.*{ *//', line) and next_line_start == commentpos) and + ((commentpos >= 1 and + line[commentpos-1] not in string.whitespace) or + (commentpos >= 2 and + line[commentpos-2] not in string.whitespace))): + error(filename, linenum, 'whitespace/comments', 2, + 'At least two spaces is best between code and comments') + + # Checks for common mistakes in TODO comments. + comment = line[commentpos:] + match = _RE_PATTERN_TODO.match(comment) + if match: + # One whitespace is correct; zero whitespace is handled elsewhere. + leading_whitespace = match.group(1) + if len(leading_whitespace) > 1: + error(filename, linenum, 'whitespace/todo', 2, + 'Too many spaces before TODO') + + username = match.group(2) + if not username: + error(filename, linenum, 'readability/todo', 2, + 'Missing username in TODO; it should look like ' + '"// TODO(my_username): Stuff."') + + middle_whitespace = match.group(3) + # Comparisons made explicit for correctness + # -- pylint: disable=g-explicit-bool-comparison + if middle_whitespace != ' ' and middle_whitespace != '': + error(filename, linenum, 'whitespace/todo', 2, + 'TODO(my_username) should be followed by a space') + + # If the comment contains an alphanumeric character, there + # should be a space somewhere between it and the // unless + # it's a /// or //! Doxygen comment. + if (re.match(r'//[^ ]*\w', comment) and + not re.match(r'(///|//\!)(\s+|$)', comment)): + error(filename, linenum, 'whitespace/comments', 4, + 'Should have a space between // and comment') - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - # Don't try to do spacing checks for operator methods. Do this by - # replacing the troublesome characters with something else, - # preserving column position for all other characters. +def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for the correctness of various spacing issues in the code. + + Things we check for: spaces around operators, spaces after + if/for/while/switch, no spaces around parens in function calls, two + spaces between code and comment, don't start a block with a blank + line, don't end a function with a blank line, don't add a blank line + after public/protected/private, don't have too many blank lines in a row. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw = clean_lines.lines_without_raw_strings + line = raw[linenum] + + # Before nixing comments, check if the line is blank for no good + # reason. This includes the first line after a block is opened, and + # blank lines at the end of a function (ie, right before a line like '}' + # + # Skip all the blank line checks if we are immediately inside a + # namespace body. In other words, don't issue blank line warnings + # for this block: + # namespace { + # + # } + # + # A warning about missing end of namespace comments will be issued instead. + # + # Also skip blank line checks for 'extern "C"' blocks, which are formatted + # like namespaces. + if (IsBlankLine(line) and + not nesting_state.InNamespaceBody() and + not nesting_state.InExternC()): + elided = clean_lines.elided + prev_line = elided[linenum - 1] + prevbrace = prev_line.rfind('{') + # TODO(unknown): Don't complain if line before blank line, and line after, + # both start with alnums and are indented the same amount. + # This ignores whitespace at the start of a namespace block + # because those are not usually indented. + if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: + # OK, we have a blank line at the start of a code block. Before we + # complain, we check if it is an exception to the rule: The previous + # non-empty line has the parameters of a function header that are indented + # 4 spaces (because they did not fit in a 80 column line when placed on + # the same line as the function name). We also check for the case where + # the previous line is indented 6 spaces, which may happen when the + # initializers of a constructor do not fit into a 80 column line. + exception = False + if re.match(r' {6}\w', prev_line): # Initializer list? + # We are looking for the opening column of initializer list, which + # should be indented 4 spaces to cause 6 space indentation afterwards. + search_position = linenum-2 + while (search_position >= 0 + and re.match(r' {6}\w', elided[search_position])): + search_position -= 1 + exception = (search_position >= 0 + and elided[search_position][:5] == ' :') + else: + # Search for the function arguments or an initializer list. We use a + # simple heuristic here: If the line is indented 4 spaces; and we have a + # closing paren, without the opening paren, followed by an opening brace + # or colon (for initializer lists) we assume that it is the last line of + # a function header. If we have a colon indented 4 spaces, it is an + # initializer list. + exception = (re.match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', + prev_line) + or re.match(r' {4}:', prev_line)) + + if not exception: + error(filename, linenum, 'whitespace/blank_line', 2, + 'Redundant blank line at the start of a code block ' + 'should be deleted.') + # Ignore blank lines at the end of a block in a long if-else + # chain, like this: + # if (condition1) { + # // Something followed by a blank line # - # The replacement is done repeatedly to avoid false positives from - # operators that call operators. - while True: - match = Match(r"^(.*\boperator\b)(\S+)(\s*\(.*)$", line) - if match: - line = match.group(1) + ("_" * len(match.group(2))) + match.group(3) - else: - break + # } else if (condition2) { + # // Something else + # } + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + if (next_line + and re.match(r'\s*}', next_line) + and next_line.find('} else ') == -1): + error(filename, linenum, 'whitespace/blank_line', 3, + 'Redundant blank line at the end of a code block ' + 'should be deleted.') + + matched = re.match(r'\s*(public|protected|private):', prev_line) + if matched: + error(filename, linenum, 'whitespace/blank_line', 3, + f'Do not leave a blank line after "{matched.group(1)}:"') + + # Next, check comments + next_line_start = 0 + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + next_line_start = len(next_line) - len(next_line.lstrip()) + CheckComment(line, filename, linenum, next_line_start, error) + + # get rid of comments and strings + line = clean_lines.elided[linenum] + + # You shouldn't have spaces before your brackets, except for C++11 attributes + # or maybe after 'delete []', 'return []() {};', or 'auto [abc, ...] = ...;'. + if (re.search(r'\w\s+\[(?!\[)', line) and + not re.search(r'(?:auto&?|delete|return)\s+\[', line)): + error(filename, linenum, 'whitespace/braces', 5, + 'Extra space before [') + + # In range-based for, we wanted spaces before and after the colon, but + # not around "::" tokens that might appear. + if (re.search(r'for *\(.*[^:]:[^: ]', line) or + re.search(r'for *\(.*[^: ]:[^:]', line)): + error(filename, linenum, 'whitespace/forcolon', 2, + 'Missing space around colon in range-based for loop') - # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". - # Otherwise not. Note we only check for non-spaces on *both* sides; - # sometimes people put non-spaces on one side when aligning ='s among - # many lines (not that this is behavior that I approve of...) - if ( - (Search(r"[\w.]=", line) or Search(r"=[\w.]", line)) - and not Search(r"\b(if|while|for) ", line) - # Operators taken from [lex.operators] in C++11 standard. - and not Search(r"(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)", line) - and not Search(r"operator=", line) - ): - error(filename, linenum, "whitespace/operators", 4, "Missing spaces around =") - - # It's ok not to have spaces around binary operators like + - * /, but if - # there's too little whitespace, we get concerned. It's hard to tell, - # though, so we punt on this one for now. TODO. - - # You should always have whitespace around binary operators. - # - # Check <= and >= first to avoid false positives with < and >, then - # check non-include lines for spacing around < and >. - # - # If the operator is followed by a comma, assume it's be used in a - # macro context and don't do any checks. This avoids false - # positives. - # - # Note that && is not included here. This is because there are too - # many false positives due to RValue references. - match = Search(r"[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]", line) + +def CheckOperatorSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around operators. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Don't try to do spacing checks for operator methods. Do this by + # replacing the troublesome characters with something else, + # preserving column position for all other characters. + # + # The replacement is done repeatedly to avoid false positives from + # operators that call operators. + while True: + match = re.match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line) if match: - error( - filename, - linenum, - "whitespace/operators", - 3, - "Missing spaces around %s" % match.group(1), - ) - elif not Match(r"#.*include", line): - # Look for < that is not surrounded by spaces. This is only - # triggered if both sides are missing spaces, even though - # technically should should flag if at least one side is missing a - # space. This is done to avoid some false positives with shifts. - match = Match(r"^(.*[^\s<])<[^\s=<,]", line) - if match: - (_, _, end_pos) = CloseExpression(clean_lines, linenum, len(match.group(1))) - if end_pos <= -1: - error( - filename, - linenum, - "whitespace/operators", - 3, - "Missing spaces around <", - ) - - # Look for > that is not surrounded by spaces. Similar to the - # above, we only trigger if both sides are missing spaces to avoid - # false positives with shifts. - match = Match(r"^(.*[^-\s>])>[^\s=>,]", line) - if match: - (_, _, start_pos) = ReverseCloseExpression( - clean_lines, linenum, len(match.group(1)) - ) - if start_pos <= -1: - error( - filename, - linenum, - "whitespace/operators", - 3, - "Missing spaces around >", - ) - - # We allow no-spaces around << when used like this: 10<<20, but - # not otherwise (particularly, not when used as streams) - # - # We also allow operators following an opening parenthesis, since - # those tend to be macros that deal with operators. - match = Search(r"(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])", line) - if ( - match - and not (match.group(1).isdigit() and match.group(2).isdigit()) - and not (match.group(1) == "operator" and match.group(2) == ";") - ): - error(filename, linenum, "whitespace/operators", 3, "Missing spaces around <<") - - # We allow no-spaces around >> for almost anything. This is because - # C++11 allows ">>" to close nested templates, which accounts for - # most cases when ">>" is not followed by a space. - # - # We still warn on ">>" followed by alpha character, because that is - # likely due to ">>" being used for right shifts, e.g.: - # value >> alpha - # - # When ">>" is used to close templates, the alphanumeric letter that - # follows would be part of an identifier, and there should still be - # a space separating the template type and the identifier. - # type> alpha - match = Search(r">>[a-zA-Z_]", line) + line = match.group(1) + ('_' * len(match.group(2))) + match.group(3) + else: + break + + # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". + # Otherwise not. Note we only check for non-spaces on *both* sides; + # sometimes people put non-spaces on one side when aligning ='s among + # many lines (not that this is behavior that I approve of...) + if ((re.search(r'[\w.]=', line) or + re.search(r'=[\w.]', line)) + and not re.search(r'\b(if|while|for) ', line) + # Operators taken from [lex.operators] in C++11 standard. + and not re.search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) + and not re.search(r'operator=', line)): + error(filename, linenum, 'whitespace/operators', 4, + 'Missing spaces around =') + + # It's ok not to have spaces around binary operators like + - * /, but if + # there's too little whitespace, we get concerned. It's hard to tell, + # though, so we punt on this one for now. TODO. + + # You should always have whitespace around binary operators. + # + # Check <= and >= first to avoid false positives with < and >, then + # check non-include lines for spacing around < and >. + # + # If the operator is followed by a comma, assume it's be used in a + # macro context and don't do any checks. This avoids false + # positives. + # + # Note that && is not included here. This is because there are too + # many false positives due to RValue references. + match = re.search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line) + if match: + # TODO: support alternate operators + error(filename, linenum, 'whitespace/operators', 3, + f'Missing spaces around {match.group(1)}') + elif not re.match(r'#.*include', line): + # Look for < that is not surrounded by spaces. This is only + # triggered if both sides are missing spaces, even though + # technically should should flag if at least one side is missing a + # space. This is done to avoid some false positives with shifts. + match = re.match(r'^(.*[^\s<])<[^\s=<,]', line) if match: - error(filename, linenum, "whitespace/operators", 3, "Missing spaces around >>") - - # There shouldn't be space around unary operators - match = Search(r"(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])", line) + (_, _, end_pos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if end_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <') + + # Look for > that is not surrounded by spaces. Similar to the + # above, we only trigger if both sides are missing spaces to avoid + # false positives with shifts. + match = re.match(r'^(.*[^-\s>])>[^\s=>,]', line) if match: - error( - filename, - linenum, - "whitespace/operators", - 4, - "Extra space for operator %s" % match.group(1), - ) + (_, _, start_pos) = ReverseCloseExpression( + clean_lines, linenum, len(match.group(1))) + if start_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >') + + # We allow no-spaces around << when used like this: 10<<20, but + # not otherwise (particularly, not when used as streams) + # + # We also allow operators following an opening parenthesis, since + # those tend to be macros that deal with operators. + match = re.search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line) + if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and + not (match.group(1) == 'operator' and match.group(2) == ';')): + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <<') + + # We allow no-spaces around >> for almost anything. This is because + # C++11 allows ">>" to close nested templates, which accounts for + # most cases when ">>" is not followed by a space. + # + # We still warn on ">>" followed by alpha character, because that is + # likely due to ">>" being used for right shifts, e.g.: + # value >> alpha + # + # When ">>" is used to close templates, the alphanumeric letter that + # follows would be part of an identifier, and there should still be + # a space separating the template type and the identifier. + # type> alpha + match = re.search(r'>>[a-zA-Z_]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >>') + + # There shouldn't be space around unary operators + match = re.search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) + if match: + error(filename, linenum, 'whitespace/operators', 4, + f'Extra space for operator {match.group(1)}') def CheckParenthesisSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing around parentheses. + """Checks for horizontal spacing around parentheses. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # No spaces after an if, while, switch, or for + match = re.search(r' (if\(|for\(|while\(|switch\()', line) + if match: + error(filename, linenum, 'whitespace/parens', 5, + f'Missing space before ( in {match.group(1)}') + + # For if/for/while/switch, the left and right parens should be + # consistent about how many spaces are inside the parens, and + # there should either be zero or one spaces inside the parens. + # We don't want: "if ( foo)" or "if ( foo )". + # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. + match = re.search(r'\b(if|for|while|switch)\s*' + r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', + line) + if match: + if len(match.group(2)) != len(match.group(4)): + if not (match.group(3) == ';' and + len(match.group(2)) == 1 + len(match.group(4)) or + not match.group(2) and re.search(r'\bfor\s*\(.*; \)', line)): + error(filename, linenum, 'whitespace/parens', 5, + f'Mismatching spaces inside () in {match.group(1)}') + if len(match.group(2)) not in [0, 1]: + error(filename, linenum, 'whitespace/parens', 5, + f'Should have zero or one spaces inside ( and ) in {match.group(1)}') - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - # No spaces after an if, while, switch, or for - match = Search(r" (if\(|for\(|while\(|switch\()", line) - if match: - error( - filename, - linenum, - "whitespace/parens", - 5, - "Missing space before ( in %s" % match.group(1), - ) - - # For if/for/while/switch, the left and right parens should be - # consistent about how many spaces are inside the parens, and - # there should either be zero or one spaces inside the parens. - # We don't want: "if ( foo)" or "if ( foo )". - # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. - match = Search( - r"\b(if|for|while|switch)\s*" r"\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$", line - ) - if match: - if len(match.group(2)) != len(match.group(4)): - if not ( - match.group(3) == ";" - and len(match.group(2)) == 1 + len(match.group(4)) - or not match.group(2) - and Search(r"\bfor\s*\(.*; \)", line) - ): - error( - filename, - linenum, - "whitespace/parens", - 5, - "Mismatching spaces inside () in %s" % match.group(1), - ) - if len(match.group(2)) not in [0, 1]: - error( - filename, - linenum, - "whitespace/parens", - 5, - "Should have zero or one spaces inside ( and ) in %s" % match.group(1), - ) +def CheckCommaSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing near commas and semicolons. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + raw = clean_lines.lines_without_raw_strings + line = clean_lines.elided[linenum] + + # You should always have a space after a comma (either as fn arg or operator) + # + # This does not apply when the non-space character following the + # comma is another comma, since the only time when that happens is + # for empty macro arguments. + # + # We run this check in two passes: first pass on elided lines to + # verify that lines contain missing whitespaces, second pass on raw + # lines to confirm that those missing whitespaces are not due to + # elided comments. + match = re.search(r',[^,\s]', re.sub(r'\b__VA_OPT__\s*\(,\)', '', + re.sub(r'\boperator\s*,\s*\(', 'F(', line))) + if (match and re.search(r',[^,\s]', raw[linenum])): + error(filename, linenum, 'whitespace/comma', 3, + 'Missing space after ,') + + # You should always have a space after a semicolon + # except for few corner cases + # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more + # space after ; + if re.search(r';[^\s};\\)/]', line): + error(filename, linenum, 'whitespace/semicolon', 3, + 'Missing space after ;') -def CheckCommaSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing near commas and semicolons. +def _IsType(clean_lines, nesting_state, expr): + """Check if expression looks like a type name, returns true if so. + + Args: + clean_lines: A CleansedLines instance containing the file. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + expr: The expression to check. + Returns: + True, if token looks like a type. + """ + # Keep only the last token in the expression + last_word = re.match(r'^.*(\b\S+)$', expr) + if last_word: + token = last_word.group(1) + else: + token = expr + + # Match native types and stdint types + if _TYPES.match(token): + return True - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - raw = clean_lines.lines_without_raw_strings - line = clean_lines.elided[linenum] + # Try a bit harder to match templated types. Walk up the nesting + # stack until we find something that resembles a typename + # declaration for what we are looking for. + typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) + + r'\b') + block_index = len(nesting_state.stack) - 1 + while block_index >= 0: + if isinstance(nesting_state.stack[block_index], _NamespaceInfo): + return False + + # Found where the opening brace is. We want to scan from this + # line up to the beginning of the function, minus a few lines. + # template + # class C + # : public ... { // start scanning here + last_line = nesting_state.stack[block_index].starting_linenum + + next_block_start = 0 + if block_index > 0: + next_block_start = nesting_state.stack[block_index - 1].starting_linenum + first_line = last_line + while first_line >= next_block_start: + if clean_lines.elided[first_line].find('template') >= 0: + break + first_line -= 1 + if first_line < next_block_start: + # Didn't find any "template" keyword before reaching the next block, + # there are probably no template things to check for this block + block_index -= 1 + continue + + # Look for typename in the specified range + for i in range(first_line, last_line + 1, 1): + if re.search(typename_pattern, clean_lines.elided[i]): + return True + block_index -= 1 + + return False - # You should always have a space after a comma (either as fn arg or operator) + +def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for horizontal spacing near commas. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Except after an opening paren, or after another opening brace (in case of + # an initializer list, for instance), you should have spaces before your + # braces when they are delimiting blocks, classes, namespaces etc. + # And since you should never have braces at the beginning of a line, + # this is an easy test. Except that braces used for initialization don't + # follow the same rule; we often don't want spaces before those. + match = re.match(r'^(.*[^ ({>]){', line) + + if match: + # Try a bit harder to check for brace initialization. This + # happens in one of the following forms: + # Constructor() : initializer_list_{} { ... } + # Constructor{}.MemberFunction() + # Type variable{}; + # FunctionCall(type{}, ...); + # LastArgument(..., type{}); + # LOG(INFO) << type{} << " ..."; + # map_of_type[{...}] = ...; + # ternary = expr ? new type{} : nullptr; + # OuterTemplate{}> # - # This does not apply when the non-space character following the - # comma is another comma, since the only time when that happens is - # for empty macro arguments. + # We check for the character following the closing brace, and + # silence the warning if it's one of those listed above, i.e. + # "{.;,)<>]:". # - # We run this check in two passes: first pass on elided lines to - # verify that lines contain missing whitespaces, second pass on raw - # lines to confirm that those missing whitespaces are not due to - # elided comments. - if Search(r",[^,\s]", ReplaceAll(r"\boperator\s*,\s*\(", "F(", line)) and Search( - r",[^,\s]", raw[linenum] - ): - error(filename, linenum, "whitespace/comma", 3, "Missing space after ,") - - # You should always have a space after a semicolon - # except for few corner cases - # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more - # space after ; - if Search(r";[^\s};\\)/]", line): - error(filename, linenum, "whitespace/semicolon", 3, "Missing space after ;") + # To account for nested initializer list, we allow any number of + # closing braces up to "{;,)<". We can't simply silence the + # warning on first sight of closing brace, because that would + # cause false negatives for things that are not initializer lists. + # Silence this: But not this: + # Outer{ if (...) { + # Inner{...} if (...){ // Missing space before { + # }; } + # + # There is a false negative with this approach if people inserted + # spurious semicolons, e.g. "if (cond){};", but we will catch the + # spurious semicolon with a separate check. + leading_text = match.group(1) + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + trailing_text = '' + if endpos > -1: + trailing_text = endline[endpos:] + for offset in range(endlinenum + 1, + min(endlinenum + 3, clean_lines.NumLines() - 1)): + trailing_text += clean_lines.elided[offset] + # We also suppress warnings for `uint64_t{expression}` etc., as the style + # guide recommends brace initialization for integral types to avoid + # overflow/truncation. + if (not re.match(r'^[\s}]*[{.;,)<>\]:]', trailing_text) + and not _IsType(clean_lines, nesting_state, leading_text)): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before {') + + # Make sure '} else {' has spaces. + if re.search(r'}else', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before else') + + # You shouldn't have a space before a semicolon at the end of the line. + # There's a special case for "for" since the style guide allows space before + # the semicolon there. + if re.search(r':\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Semicolon defining empty statement. Use {} instead.') + elif re.search(r'^\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Line contains only semicolon. If this should be an empty statement, ' + 'use {} instead.') + elif (re.search(r'\s+;\s*$', line) and + not re.search(r'\bfor\b', line)): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Extra space before last semicolon. If this should be an empty ' + 'statement, use {} instead.') -def _IsType(clean_lines, nesting_state, expr): - """Check if expression looks like a type name, returns true if so. +def IsDecltype(clean_lines, linenum, column): + """Check if the token ending on (linenum, column) is decltype(). + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: the number of the line to check. + column: end column of the token to check. + Returns: + True if this token is decltype() expression, False otherwise. + """ + (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) + if start_col < 0: + return False + if re.search(r'\bdecltype\s*$', text[0:start_col]): + return True + return False - Args: - clean_lines: A CleansedLines instance containing the file. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - expr: The expression to check. - Returns: - True, if token looks like a type. - """ - # Keep only the last token in the expression - last_word = Match(r"^.*(\b\S+)$", expr) - if last_word: - token = last_word.group(1) - else: - token = expr +def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): + """Checks for additional blank line issues related to sections. + + Currently the only thing checked here is blank line before protected/private. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + class_info: A _ClassInfo objects. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Skip checks if the class is small, where small means 25 lines or less. + # 25 lines seems like a good cutoff since that's the usual height of + # terminals, and any class that can't fit in one screen can't really + # be considered "small". + # + # Also skip checks if we are on the first line. This accounts for + # classes that look like + # class Foo { public: ... }; + # + # If we didn't find the end of the class, last_line would be zero, + # and the check will be skipped by the first condition. + if (class_info.last_line - class_info.starting_linenum <= 24 or + linenum <= class_info.starting_linenum): + return + + matched = re.match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) + if matched: + # Issue warning if the line before public/protected/private was + # not a blank line, but don't do this if the previous line contains + # "class" or "struct". This can happen two ways: + # - We are at the beginning of the class. + # - We are forward-declaring an inner class that is semantically + # private, but needed to be public for implementation reasons. + # Also ignores cases where the previous line ends with a backslash as can be + # common when defining classes in C macros. + prev_line = clean_lines.lines[linenum - 1] + if (not IsBlankLine(prev_line) and + not re.search(r'\b(class|struct)\b', prev_line) and + not re.search(r'\\$', prev_line)): + # Try a bit harder to find the beginning of the class. This is to + # account for multi-line base-specifier lists, e.g.: + # class Derived + # : public Base { + end_class_head = class_info.starting_linenum + for i in range(class_info.starting_linenum, linenum): + if re.search(r'\{\s*$', clean_lines.lines[i]): + end_class_head = i + break + if end_class_head < linenum - 1: + error(filename, linenum, 'whitespace/blank_line', 3, + f'"{matched.group(1)}:" should be preceded by a blank line') - # Match native types and stdint types - if _TYPES.match(token): - return True - # Try a bit harder to match templated types. Walk up the nesting - # stack until we find something that resembles a typename - # declaration for what we are looking for. - typename_pattern = r"\b(?:typename|class|struct)\s+" + re.escape(token) + r"\b" - block_index = len(nesting_state.stack) - 1 - while block_index >= 0: - if isinstance(nesting_state.stack[block_index], _NamespaceInfo): - return False - - # Found where the opening brace is. We want to scan from this - # line up to the beginning of the function, minus a few lines. - # template - # class C - # : public ... { // start scanning here - last_line = nesting_state.stack[block_index].starting_linenum - - next_block_start = 0 - if block_index > 0: - next_block_start = nesting_state.stack[block_index - 1].starting_linenum - first_line = last_line - while first_line >= next_block_start: - if clean_lines.elided[first_line].find("template") >= 0: - break - first_line -= 1 - if first_line < next_block_start: - # Didn't find any "template" keyword before reaching the next block, - # there are probably no template things to check for this block - block_index -= 1 - continue +def GetPreviousNonBlankLine(clean_lines, linenum): + """Return the most recent non-blank line and its line number. - # Look for typename in the specified range - for i in xrange(first_line, last_line + 1, 1): - if Search(typename_pattern, clean_lines.elided[i]): - return True - block_index -= 1 + Args: + clean_lines: A CleansedLines instance containing the file contents. + linenum: The number of the line to check. - return False + Returns: + A tuple with two elements. The first element is the contents of the last + non-blank line before the current line, or the empty string if this is the + first non-blank line. The second is the line number of that line, or -1 + if this is the first non-blank line. + """ - -def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): - """Checks for horizontal spacing near commas. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Except after an opening paren, or after another opening brace (in case of - # an initializer list, for instance), you should have spaces before your - # braces when they are delimiting blocks, classes, namespaces etc. - # And since you should never have braces at the beginning of a line, - # this is an easy test. Except that braces used for initialization don't - # follow the same rule; we often don't want spaces before those. - match = Match(r"^(.*[^ ({>]){", line) - - if match: - # Try a bit harder to check for brace initialization. This - # happens in one of the following forms: - # Constructor() : initializer_list_{} { ... } - # Constructor{}.MemberFunction() - # Type variable{}; - # FunctionCall(type{}, ...); - # LastArgument(..., type{}); - # LOG(INFO) << type{} << " ..."; - # map_of_type[{...}] = ...; - # ternary = expr ? new type{} : nullptr; - # OuterTemplate{}> - # - # We check for the character following the closing brace, and - # silence the warning if it's one of those listed above, i.e. - # "{.;,)<>]:". - # - # To account for nested initializer list, we allow any number of - # closing braces up to "{;,)<". We can't simply silence the - # warning on first sight of closing brace, because that would - # cause false negatives for things that are not initializer lists. - # Silence this: But not this: - # Outer{ if (...) { - # Inner{...} if (...){ // Missing space before { - # }; } - # - # There is a false negative with this approach if people inserted - # spurious semicolons, e.g. "if (cond){};", but we will catch the - # spurious semicolon with a separate check. - leading_text = match.group(1) - (endline, endlinenum, endpos) = CloseExpression( - clean_lines, linenum, len(match.group(1)) - ) - trailing_text = "" - if endpos > -1: - trailing_text = endline[endpos:] - for offset in xrange( - endlinenum + 1, min(endlinenum + 3, clean_lines.NumLines() - 1) - ): - trailing_text += clean_lines.elided[offset] - # We also suppress warnings for `uint64_t{expression}` etc., as the style - # guide recommends brace initialization for integral types to avoid - # overflow/truncation. - if not Match(r"^[\s}]*[{.;,)<>\]:]", trailing_text) and not _IsType( - clean_lines, nesting_state, leading_text - ): - error(filename, linenum, "whitespace/braces", 5, "Missing space before {") - - # Make sure '} else {' has spaces. - if Search(r"}else", line): - error(filename, linenum, "whitespace/braces", 5, "Missing space before else") - - # You shouldn't have a space before a semicolon at the end of the line. - # There's a special case for "for" since the style guide allows space before - # the semicolon there. - if Search(r":\s*;\s*$", line): - error( - filename, - linenum, - "whitespace/semicolon", - 5, - "Semicolon defining empty statement. Use {} instead.", - ) - elif Search(r"^\s*;\s*$", line): - error( - filename, - linenum, - "whitespace/semicolon", - 5, - "Line contains only semicolon. If this should be an empty statement, " - "use {} instead.", - ) - elif Search(r"\s+;\s*$", line) and not Search(r"\bfor\b", line): - error( - filename, - linenum, - "whitespace/semicolon", - 5, - "Extra space before last semicolon. If this should be an empty " - "statement, use {} instead.", - ) - - -def IsDecltype(clean_lines, linenum, column): - """Check if the token ending on (linenum, column) is decltype(). - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: the number of the line to check. - column: end column of the token to check. - Returns: - True if this token is decltype() expression, False otherwise. - """ - (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) - if start_col < 0: - return False - if Search(r"\bdecltype\s*$", text[0:start_col]): - return True - return False - - -def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): - """Checks for additional blank line issues related to sections. - - Currently the only thing checked here is blank line before protected/private. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - class_info: A _ClassInfo objects. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Skip checks if the class is small, where small means 25 lines or less. - # 25 lines seems like a good cutoff since that's the usual height of - # terminals, and any class that can't fit in one screen can't really - # be considered "small". - # - # Also skip checks if we are on the first line. This accounts for - # classes that look like - # class Foo { public: ... }; - # - # If we didn't find the end of the class, last_line would be zero, - # and the check will be skipped by the first condition. - if ( - class_info.last_line - class_info.starting_linenum <= 24 - or linenum <= class_info.starting_linenum - ): - return - - matched = Match(r"\s*(public|protected|private):", clean_lines.lines[linenum]) - if matched: - # Issue warning if the line before public/protected/private was - # not a blank line, but don't do this if the previous line contains - # "class" or "struct". This can happen two ways: - # - We are at the beginning of the class. - # - We are forward-declaring an inner class that is semantically - # private, but needed to be public for implementation reasons. - # Also ignores cases where the previous line ends with a backslash as can be - # common when defining classes in C macros. - prev_line = clean_lines.lines[linenum - 1] - if ( - not IsBlankLine(prev_line) - and not Search(r"\b(class|struct)\b", prev_line) - and not Search(r"\\$", prev_line) - ): - # Try a bit harder to find the beginning of the class. This is to - # account for multi-line base-specifier lists, e.g.: - # class Derived - # : public Base { - end_class_head = class_info.starting_linenum - for i in range(class_info.starting_linenum, linenum): - if Search(r"\{\s*$", clean_lines.lines[i]): - end_class_head = i - break - if end_class_head < linenum - 1: - error( - filename, - linenum, - "whitespace/blank_line", - 3, - '"%s:" should be preceded by a blank line' % matched.group(1), - ) - - -def GetPreviousNonBlankLine(clean_lines, linenum): - """Return the most recent non-blank line and its line number. - - Args: - clean_lines: A CleansedLines instance containing the file contents. - linenum: The number of the line to check. - - Returns: - A tuple with two elements. The first element is the contents of the last - non-blank line before the current line, or the empty string if this is the - first non-blank line. The second is the line number of that line, or -1 - if this is the first non-blank line. - """ - - prevlinenum = linenum - 1 - while prevlinenum >= 0: - prevline = clean_lines.elided[prevlinenum] - if not IsBlankLine(prevline): # if not a blank line... - return (prevline, prevlinenum) - prevlinenum -= 1 - return ("", -1) + prevlinenum = linenum - 1 + while prevlinenum >= 0: + prevline = clean_lines.elided[prevlinenum] + if not IsBlankLine(prevline): # if not a blank line... + return (prevline, prevlinenum) + prevlinenum -= 1 + return ('', -1) def CheckBraces(filename, clean_lines, linenum, error): - """Looks for misplaced braces (e.g. at the end of line). - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - line = clean_lines.elided[linenum] # get rid of comments and strings - - if Match(r"\s*{\s*$", line): - # We allow an open brace to start a line in the case where someone is using - # braces in a block to explicitly create a new scope, which is commonly used - # to control the lifetime of stack-allocated variables. Braces are also - # used for brace initializers inside function calls. We don't detect this - # perfectly: we just don't complain if the last non-whitespace character on - # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the - # previous line starts a preprocessor block. We also allow a brace on the - # following line if it is part of an array initialization and would not fit - # within the 80 character limit of the preceding line. - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if ( - not Search(r"[,;:}{(]\s*$", prevline) - and not Match(r"\s*#", prevline) - and not (GetLineWidth(prevline) > _line_length - 2 and "[]" in prevline) - ): - error( - filename, - linenum, - "whitespace/braces", - 4, - "{ should almost always be at the end of the previous line", - ) - - # An else clause should be on the same line as the preceding closing brace. - if Match(r"\s*else\b\s*(?:if\b|\{|$)", line): - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if Match(r"\s*}\s*$", prevline): - error( - filename, - linenum, - "whitespace/newline", - 4, - "An else should appear on the same line as the preceding }", - ) - - # If braces come on one side of an else, they should be on both. - # However, we have to worry about "else if" that spans multiple lines! - if Search(r"else if\s*\(", line): # could be multi-line if - brace_on_left = bool(Search(r"}\s*else if\s*\(", line)) - # find the ( after the if - pos = line.find("else if") - pos = line.find("(", pos) - if pos > 0: - (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) - brace_on_right = endline[endpos:].find("{") != -1 - if brace_on_left != brace_on_right: # must be brace after if - error( - filename, - linenum, - "readability/braces", - 5, - "If an else has a brace on one side, it should have it on both", - ) - elif Search(r"}\s*else[^{]*$", line) or Match(r"[^}]*else\s*{", line): - error( - filename, - linenum, - "readability/braces", - 5, - "If an else has a brace on one side, it should have it on both", - ) - - # Likewise, an else should never have the else clause on the same line - if Search(r"\belse [^\s{]", line) and not Search(r"\belse if\b", line): - error( - filename, - linenum, - "whitespace/newline", - 4, - "Else clause should never be on same line as else (use 2 lines)", - ) - - # In the same way, a do/while should never be on one line - if Match(r"\s*do [^\s{]", line): - error( - filename, - linenum, - "whitespace/newline", - 4, - "do/while clauses should not be on a single line", - ) - - # Check single-line if/else bodies. The style guide says 'curly braces are not - # required for single-line statements'. We additionally allow multi-line, - # single statements, but we reject anything with more than one semicolon in - # it. This means that the first semicolon after the if should be at the end of - # its line, and the line after that should have an indent level equal to or - # lower than the if. We also check for ambiguous if/else nesting without - # braces. - if_else_match = Search(r"\b(if\s*(|constexpr)\s*\(|else\b)", line) - if if_else_match and not Match(r"\s*#", line): - if_indent = GetIndentLevel(line) - endline, endlinenum, endpos = line, linenum, if_else_match.end() - if_match = Search(r"\bif\s*(|constexpr)\s*\(", line) - if if_match: - # This could be a multiline if condition, so find the end first. - pos = if_match.end() - 1 - (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) - # Check for an opening brace, either directly after the if or on the next - # line. If found, this isn't a single-statement conditional. - if not Match(r"\s*{", endline[endpos:]) and not ( - Match(r"\s*$", endline[endpos:]) - and endlinenum < (len(clean_lines.elided) - 1) - and Match(r"\s*{", clean_lines.elided[endlinenum + 1]) - ): - while ( - endlinenum < len(clean_lines.elided) - and ";" not in clean_lines.elided[endlinenum][endpos:] - ): - endlinenum += 1 - endpos = 0 - if endlinenum < len(clean_lines.elided): - endline = clean_lines.elided[endlinenum] - # We allow a mix of whitespace and closing braces (e.g. for one-liner - # methods) and a single \ after the semicolon (for macros) - endpos = endline.find(";") - if not Match(r";[\s}]*(\\?)$", endline[endpos:]): - # Semicolon isn't the last character, there's something trailing. - # Output a warning if the semicolon is not contained inside - # a lambda expression. - if not Match( - r"^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$", endline - ): - error( - filename, - linenum, - "readability/braces", - 4, - "If/else bodies with multiple statements require braces", - ) - elif endlinenum < len(clean_lines.elided) - 1: - # Make sure the next line is dedented - next_line = clean_lines.elided[endlinenum + 1] - next_indent = GetIndentLevel(next_line) - # With ambiguous nested if statements, this will error out on the - # if that *doesn't* match the else, regardless of whether it's the - # inner one or outer one. - if ( - if_match - and Match(r"\s*else\b", next_line) - and next_indent != if_indent - ): - error( - filename, - linenum, - "readability/braces", - 4, - "Else clause should be indented at the same level as if. " - "Ambiguous nested if/else chains require braces.", - ) - elif next_indent > if_indent: - error( - filename, - linenum, - "readability/braces", - 4, - "If/else bodies with multiple statements require braces", - ) + """Looks for misplaced braces (e.g. at the end of line). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] # get rid of comments and strings + + if re.match(r'\s*{\s*$', line): + # We allow an open brace to start a line in the case where someone is using + # braces in a block to explicitly create a new scope, which is commonly used + # to control the lifetime of stack-allocated variables. Braces are also + # used for brace initializers inside function calls. We don't detect this + # perfectly: we just don't complain if the last non-whitespace character on + # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the + # previous line starts a preprocessor block. We also allow a brace on the + # following line if it is part of an array initialization and would not fit + # within the 80 character limit of the preceding line. + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if (not re.search(r'[,;:}{(]\s*$', prevline) and + not re.match(r'\s*#', prevline) and + not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)): + error(filename, linenum, 'whitespace/braces', 4, + '{ should almost always be at the end of the previous line') + + # An else clause should be on the same line as the preceding closing brace. + if last_wrong := re.match(r'\s*else\b\s*(?:if\b|\{|$)', line): + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if re.match(r'\s*}\s*$', prevline): + error(filename, linenum, 'whitespace/newline', 4, + 'An else should appear on the same line as the preceding }') + else: + last_wrong = False + + # If braces come on one side of an else, they should be on both. + # However, we have to worry about "else if" that spans multiple lines! + if re.search(r'else if\s*\(', line): # could be multi-line if + brace_on_left = bool(re.search(r'}\s*else if\s*\(', line)) + # find the ( after the if + pos = line.find('else if') + pos = line.find('(', pos) + if pos > 0: + (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) + brace_on_right = endline[endpos:].find('{') != -1 + if brace_on_left != brace_on_right: # must be brace after if + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + # Prevent detection if statement has { and we detected an improper newline after } + elif re.search(r'}\s*else[^{]*$', line) or (re.match(r'[^}]*else\s*{', line) and not last_wrong): + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + + # No control clauses with braces should have its contents on the same line + # Exclude } which will be covered by empty-block detect + # Exclude ; which may be used by while in a do-while + if keyword := re.search( + r'\b(else if|if|while|for|switch)' # These have parens + r'\s*\(.*\)\s*(?:\[\[(?:un)?likely\]\]\s*)?{\s*[^\s\\};]', line): + error(filename, linenum, 'whitespace/newline', 5, + f'Controlled statements inside brackets of {keyword.group(1)} clause' + ' should be on a separate line') + elif keyword := re.search( + r'\b(else|do|try)' # These don't have parens + r'\s*(?:\[\[(?:un)?likely\]\]\s*)?{\s*[^\s\\}]', line): + error(filename, linenum, 'whitespace/newline', 5, + f'Controlled statements inside brackets of {keyword.group(1)} clause' + ' should be on a separate line') + + # TODO: Err on if...else and do...while statements without braces; + # style guide has changed since the below comment was written + + # Check single-line if/else bodies. The style guide says 'curly braces are not + # required for single-line statements'. We additionally allow multi-line, + # single statements, but we reject anything with more than one semicolon in + # it. This means that the first semicolon after the if should be at the end of + # its line, and the line after that should have an indent level equal to or + # lower than the if. We also check for ambiguous if/else nesting without + # braces. + if_else_match = re.search(r'\b(if\s*(|constexpr)\s*\(|else\b)', line) + if if_else_match and not re.match(r'\s*#', line): + if_indent = GetIndentLevel(line) + endline, endlinenum, endpos = line, linenum, if_else_match.end() + if_match = re.search(r'\bif\s*(|constexpr)\s*\(', line) + if if_match: + # This could be a multiline if condition, so find the end first. + pos = if_match.end() - 1 + (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) + # Check for an opening brace, either directly after the if or on the next + # line. If found, this isn't a single-statement conditional. + if (not re.match(r'\s*(?:\[\[(?:un)?likely\]\]\s*)?{', endline[endpos:]) + and not (re.match(r'\s*$', endline[endpos:]) + and endlinenum < (len(clean_lines.elided) - 1) + and re.match(r'\s*{', clean_lines.elided[endlinenum + 1]))): + while (endlinenum < len(clean_lines.elided) + and ';' not in clean_lines.elided[endlinenum][endpos:]): + endlinenum += 1 + endpos = 0 + if endlinenum < len(clean_lines.elided): + endline = clean_lines.elided[endlinenum] + # We allow a mix of whitespace and closing braces (e.g. for one-liner + # methods) and a single \ after the semicolon (for macros) + endpos = endline.find(';') + if not re.match(r';[\s}]*(\\?)$', endline[endpos:]): + # Semicolon isn't the last character, there's something trailing. + # Output a warning if the semicolon is not contained inside + # a lambda expression. + if not re.match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', + endline): + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + elif endlinenum < len(clean_lines.elided) - 1: + # Make sure the next line is dedented + next_line = clean_lines.elided[endlinenum + 1] + next_indent = GetIndentLevel(next_line) + # With ambiguous nested if statements, this will error out on the + # if that *doesn't* match the else, regardless of whether it's the + # inner one or outer one. + if (if_match and re.match(r'\s*else\b', next_line) + and next_indent != if_indent): + error(filename, linenum, 'readability/braces', 4, + 'Else clause should be indented at the same level as if. ' + 'Ambiguous nested if/else chains require braces.') + elif next_indent > if_indent: + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') def CheckTrailingSemicolon(filename, clean_lines, linenum, error): - """Looks for redundant trailing semicolon. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - line = clean_lines.elided[linenum] - - # Block bodies should not be followed by a semicolon. Due to C++11 - # brace initialization, there are more places where semicolons are - # required than not, so we use a whitelist approach to check these - # rather than a blacklist. These are the places where "};" should - # be replaced by just "}": - # 1. Some flavor of block following closing parenthesis: - # for (;;) {}; - # while (...) {}; - # switch (...) {}; - # Function(...) {}; - # if (...) {}; - # if (...) else if (...) {}; - # - # 2. else block: - # if (...) else {}; + """Looks for redundant trailing semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] + + # Block bodies should not be followed by a semicolon. Due to C++11 + # brace initialization, there are more places where semicolons are + # required than not, so we explicitly list the allowed rules rather + # than listing the disallowed ones. These are the places where "};" + # should be replaced by just "}": + # 1. Some flavor of block following closing parenthesis: + # for (;;) {}; + # while (...) {}; + # switch (...) {}; + # Function(...) {}; + # if (...) {}; + # if (...) else if (...) {}; + # + # 2. else block: + # if (...) else {}; + # + # 3. const member function: + # Function(...) const {}; + # + # 4. Block following some statement: + # x = 42; + # {}; + # + # 5. Block at the beginning of a function: + # Function(...) { + # {}; + # } + # + # Note that naively checking for the preceding "{" will also match + # braces inside multi-dimensional arrays, but this is fine since + # that expression will not contain semicolons. + # + # 6. Block following another block: + # while (true) {} + # {}; + # + # 7. End of namespaces: + # namespace {}; + # + # These semicolons seems far more common than other kinds of + # redundant semicolons, possibly due to people converting classes + # to namespaces. For now we do not warn for this case. + # + # Try matching case 1 first. + match = re.match(r'^(.*\)\s*)\{', line) + if match: + # Matched closing parenthesis (case 1). Check the token before the + # matching opening parenthesis, and don't warn if it looks like a + # macro. This avoids these false positives: + # - macro that defines a base class + # - multi-line macro that defines a base class + # - macro that defines the whole class-head # - # 3. const member function: - # Function(...) const {}; + # But we still issue warnings for macros that we know are safe to + # warn, specifically: + # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P + # - TYPED_TEST + # - INTERFACE_DEF + # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: # - # 4. Block following some statement: - # x = 42; - # {}; + # We implement a list of safe macros instead of a list of + # unsafe macros, even though the latter appears less frequently in + # google code and would have been easier to implement. This is because + # the downside for getting the allowed checks wrong means some extra + # semicolons, while the downside for getting disallowed checks wrong + # would result in compile errors. # - # 5. Block at the beginning of a function: - # Function(...) { - # {}; - # } - # - # Note that naively checking for the preceding "{" will also match - # braces inside multi-dimensional arrays, but this is fine since - # that expression will not contain semicolons. - # - # 6. Block following another block: - # while (true) {} - # {}; - # - # 7. End of namespaces: - # namespace {}; - # - # These semicolons seems far more common than other kinds of - # redundant semicolons, possibly due to people converting classes - # to namespaces. For now we do not warn for this case. - # - # Try matching case 1 first. - match = Match(r"^(.*\)\s*)\{", line) - if match: - # Matched closing parenthesis (case 1). Check the token before the - # matching opening parenthesis, and don't warn if it looks like a - # macro. This avoids these false positives: - # - macro that defines a base class - # - multi-line macro that defines a base class - # - macro that defines the whole class-head - # - # But we still issue warnings for macros that we know are safe to - # warn, specifically: - # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P - # - TYPED_TEST - # - INTERFACE_DEF - # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: - # - # We implement a whitelist of safe macros instead of a blacklist of - # unsafe macros, even though the latter appears less frequently in - # google code and would have been easier to implement. This is because - # the downside for getting the whitelist wrong means some extra - # semicolons, while the downside for getting the blacklist wrong - # would result in compile errors. - # - # In addition to macros, we also don't want to warn on - # - Compound literals - # - Lambdas - # - alignas specifier with anonymous structs - # - decltype - closing_brace_pos = match.group(1).rfind(")") - opening_parenthesis = ReverseCloseExpression( - clean_lines, linenum, closing_brace_pos - ) - if opening_parenthesis[2] > -1: - line_prefix = opening_parenthesis[0][0 : opening_parenthesis[2]] - macro = Search(r"\b([A-Z_][A-Z0-9_]*)\s*$", line_prefix) - func = Match(r"^(.*\])\s*$", line_prefix) - if ( - ( - macro - and macro.group(1) - not in ( - "TEST", - "TEST_F", - "MATCHER", - "MATCHER_P", - "TYPED_TEST", - "EXCLUSIVE_LOCKS_REQUIRED", - "SHARED_LOCKS_REQUIRED", - "LOCKS_EXCLUDED", - "INTERFACE_DEF", - ) - ) - or (func and not Search(r"\boperator\s*\[\s*\]", func.group(1))) - or Search(r"\b(?:struct|union)\s+alignas\s*$", line_prefix) - or Search(r"\bdecltype$", line_prefix) - or Search(r"\s+=\s*$", line_prefix) - ): - match = None - if ( - match - and opening_parenthesis[1] > 1 - and Search(r"\]\s*$", clean_lines.elided[opening_parenthesis[1] - 1]) - ): - # Multi-line lambda-expression - match = None - - else: - # Try matching cases 2-3. - match = Match(r"^(.*(?:else|\)\s*const)\s*)\{", line) - if not match: - # Try matching cases 4-6. These are always matched on separate lines. - # - # Note that we can't simply concatenate the previous line to the - # current line and do a single match, otherwise we may output - # duplicate warnings for the blank line case: - # if (cond) { - # // blank line - # } - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if prevline and Search(r"[;{}]\s*$", prevline): - match = Match(r"^(\s*)\{", line) - - # Check matching closing brace - if match: - (endline, endlinenum, endpos) = CloseExpression( - clean_lines, linenum, len(match.group(1)) - ) - if endpos > -1 and Match(r"^\s*;", endline[endpos:]): - # Current {} pair is eligible for semicolon check, and we have found - # the redundant semicolon, output warning here. - # - # Note: because we are scanning forward for opening braces, and - # outputting warnings for the matching closing brace, if there are - # nested blocks with trailing semicolons, we will get the error - # messages in reversed order. - - # We need to check the line forward for NOLINT - raw_lines = clean_lines.raw_lines - ParseNolintSuppressions( - filename, raw_lines[endlinenum - 1], endlinenum - 1, error - ) - ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum, error) - - error( - filename, - endlinenum, - "readability/braces", - 4, - "You don't need a ; after a }", - ) + # In addition to macros, we also don't want to warn on + # - Compound literals + # - Lambdas + # - alignas specifier with anonymous structs + # - decltype + closing_brace_pos = match.group(1).rfind(')') + opening_parenthesis = ReverseCloseExpression( + clean_lines, linenum, closing_brace_pos) + if opening_parenthesis[2] > -1: + line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] + macro = re.search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix) + func = re.match(r'^(.*\])\s*$', line_prefix) + if ((macro and + macro.group(1) not in ( + 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', + 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', + 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or + (func and not re.search(r'\boperator\s*\[\s*\]', func.group(1))) or + re.search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or + re.search(r'\bdecltype$', line_prefix) or + re.search(r'\s+=\s*$', line_prefix)): + match = None + if (match and + opening_parenthesis[1] > 1 and + re.search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): + # Multi-line lambda-expression + match = None + + else: + # Try matching cases 2-3. + match = re.match(r'^(.*(?:else|\)\s*const)\s*)\{', line) + if not match: + # Try matching cases 4-6. These are always matched on separate lines. + # + # Note that we can't simply concatenate the previous line to the + # current line and do a single match, otherwise we may output + # duplicate warnings for the blank line case: + # if (cond) { + # // blank line + # } + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if prevline and re.search(r'[;{}]\s*$', prevline): + match = re.match(r'^(\s*)\{', line) + + # Check matching closing brace + if match: + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if endpos > -1 and re.match(r'^\s*;', endline[endpos:]): + # Current {} pair is eligible for semicolon check, and we have found + # the redundant semicolon, output warning here. + # + # Note: because we are scanning forward for opening braces, and + # outputting warnings for the matching closing brace, if there are + # nested blocks with trailing semicolons, we will get the error + # messages in reversed order. + + # We need to check the line forward for NOLINT + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1, + error) + ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum, + error) + + error(filename, endlinenum, 'readability/braces', 4, + "You don't need a ; after a }") def CheckEmptyBlockBody(filename, clean_lines, linenum, error): - """Look for empty loop/conditional body with only a single semicolon. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Search for loop keywords at the beginning of the line. Because only - # whitespaces are allowed before the keywords, this will also ignore most - # do-while-loops, since those lines should start with closing brace. - # - # We also check "if" blocks here, since an empty conditional block - # is likely an error. - line = clean_lines.elided[linenum] - matched = Match(r"\s*(for|while|if)\s*\(", line) - if matched: - # Find the end of the conditional expression. - (end_line, end_linenum, end_pos) = CloseExpression( - clean_lines, linenum, line.find("(") - ) - - # Output warning if what follows the condition expression is a semicolon. - # No warning for all other cases, including whitespace or newline, since we - # have a separate check for semicolons preceded by whitespace. - if end_pos >= 0 and Match(r";", end_line[end_pos:]): - if matched.group(1) == "if": - error( - filename, - end_linenum, - "whitespace/empty_conditional_body", - 5, - "Empty conditional bodies should use {}", - ) - else: - error( - filename, - end_linenum, - "whitespace/empty_loop_body", - 5, - "Empty loop bodies should use {} or continue", - ) - - # Check for if statements that have completely empty bodies (no comments) - # and no else clauses. - if end_pos >= 0 and matched.group(1) == "if": - # Find the position of the opening { for the if statement. - # Return without logging an error if it has no brackets. - opening_linenum = end_linenum - opening_line_fragment = end_line[end_pos:] - # Loop until EOF or find anything that's not whitespace or opening {. - while not Search(r"^\s*\{", opening_line_fragment): - if Search(r"^(?!\s*$)", opening_line_fragment): - # Conditional has no brackets. - return - opening_linenum += 1 - if opening_linenum == len(clean_lines.elided): - # Couldn't find conditional's opening { or any code before EOF. - return - opening_line_fragment = clean_lines.elided[opening_linenum] - # Set opening_line (opening_line_fragment may not be entire opening line). - opening_line = clean_lines.elided[opening_linenum] - - # Find the position of the closing }. - opening_pos = opening_line_fragment.find("{") - if opening_linenum == end_linenum: - # We need to make opening_pos relative to the start of the entire line. - opening_pos += end_pos - (closing_line, closing_linenum, closing_pos) = CloseExpression( - clean_lines, opening_linenum, opening_pos - ) - if closing_pos < 0: - return - - # Now construct the body of the conditional. This consists of the portion - # of the opening line after the {, all lines until the closing line, - # and the portion of the closing line before the }. - if clean_lines.raw_lines[opening_linenum] != CleanseComments( - clean_lines.raw_lines[opening_linenum] - ): - # Opening line ends with a comment, so conditional isn't empty. - return - if closing_linenum > opening_linenum: - # Opening line after the {. Ignore comments here since we checked above. - bodylist = list(opening_line[opening_pos + 1 :]) - # All lines until closing line, excluding closing line, with comments. - bodylist.extend( - clean_lines.raw_lines[opening_linenum + 1 : closing_linenum] - ) - # Closing line before the }. Won't (and can't) have comments. - bodylist.append(clean_lines.elided[closing_linenum][: closing_pos - 1]) - body = "\n".join(bodylist) - else: - # If statement has brackets and fits on a single line. - body = opening_line[opening_pos + 1 : closing_pos - 1] - - # Check if the body is empty - if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): - return - # The body is empty. Now make sure there's not an else clause. - current_linenum = closing_linenum - current_line_fragment = closing_line[closing_pos:] - # Loop until EOF or find anything that's not whitespace or else clause. - while Search(r"^\s*$|^(?=\s*else)", current_line_fragment): - if Search(r"^(?=\s*else)", current_line_fragment): - # Found an else clause, so don't log an error. - return - current_linenum += 1 - if current_linenum == len(clean_lines.elided): - break - current_line_fragment = clean_lines.elided[current_linenum] - - # The body is empty and there's no else clause until EOF or other code. - error( - filename, - end_linenum, - "whitespace/empty_if_body", - 4, - ("If statement had no body and no else clause"), - ) - - -def FindCheckMacro(line): - """Find a replaceable CHECK-like macro. - - Args: - line: line to search on. - Returns: - (macro name, start position), or (None, -1) if no replaceable - macro is found. - """ - for macro in _CHECK_MACROS: - i = line.find(macro) - if i >= 0: - # Find opening parenthesis. Do a regular expression match here - # to make sure that we are matching the expected CHECK macro, as - # opposed to some other macro that happens to contain the CHECK - # substring. - matched = Match(r"^(.*\b" + macro + r"\s*)\(", line) - if not matched: - continue - return (macro, len(matched.group(1))) - return (None, -1) - - -def CheckCheck(filename, clean_lines, linenum, error): - """Checks the use of CHECK and EXPECT macros. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Decide the set of replacement macros that should be suggested - lines = clean_lines.elided - (check_macro, start_pos) = FindCheckMacro(lines[linenum]) - if not check_macro: - return - - # Find end of the boolean expression by matching parentheses - (last_line, end_line, end_pos) = CloseExpression(clean_lines, linenum, start_pos) - if end_pos < 0: - return - - # If the check macro is followed by something other than a - # semicolon, assume users will log their own custom error messages - # and don't suggest any replacements. - if not Match(r"\s*;", last_line[end_pos:]): + """Look for empty loop/conditional body with only a single semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Search for loop keywords at the beginning of the line. Because only + # whitespaces are allowed before the keywords, this will also ignore most + # do-while-loops, since those lines should start with closing brace. + # + # We also check "if" blocks here, since an empty conditional block + # is likely an error. + line = clean_lines.elided[linenum] + matched = re.match(r'\s*(for|while|if)\s*\(', line) + if matched: + # Find the end of the conditional expression. + (end_line, end_linenum, end_pos) = CloseExpression( + clean_lines, linenum, line.find('(')) + + # Output warning if what follows the condition expression is a semicolon. + # No warning for all other cases, including whitespace or newline, since we + # have a separate check for semicolons preceded by whitespace. + if end_pos >= 0 and re.match(r';', end_line[end_pos:]): + if matched.group(1) == 'if': + error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, + 'Empty conditional bodies should use {}') + else: + error(filename, end_linenum, 'whitespace/empty_loop_body', 5, + 'Empty loop bodies should use {} or continue') + + # Check for if statements that have completely empty bodies (no comments) + # and no else clauses. + if end_pos >= 0 and matched.group(1) == 'if': + # Find the position of the opening { for the if statement. + # Return without logging an error if it has no brackets. + opening_linenum = end_linenum + opening_line_fragment = end_line[end_pos:] + # Loop until EOF or find anything that's not whitespace or opening {. + while not re.search(r'^\s*\{', opening_line_fragment): + if re.search(r'^(?!\s*$)', opening_line_fragment): + # Conditional has no brackets. + return + opening_linenum += 1 + if opening_linenum == len(clean_lines.elided): + # Couldn't find conditional's opening { or any code before EOF. + return + opening_line_fragment = clean_lines.elided[opening_linenum] + # Set opening_line (opening_line_fragment may not be entire opening line). + opening_line = clean_lines.elided[opening_linenum] + + # Find the position of the closing }. + opening_pos = opening_line_fragment.find('{') + if opening_linenum == end_linenum: + # We need to make opening_pos relative to the start of the entire line. + opening_pos += end_pos + (closing_line, closing_linenum, closing_pos) = CloseExpression( + clean_lines, opening_linenum, opening_pos) + if closing_pos < 0: return - if linenum == end_line: - expression = lines[linenum][start_pos + 1 : end_pos - 1] - else: - expression = lines[linenum][start_pos + 1 :] - for i in xrange(linenum + 1, end_line): - expression += lines[i] - expression += last_line[0 : end_pos - 1] - - # Parse expression so that we can take parentheses into account. - # This avoids false positives for inputs like "CHECK((a < 4) == b)", - # which is not replaceable by CHECK_LE. - lhs = "" - rhs = "" - operator = None - while expression: - matched = Match( - r"^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||" r"==|!=|>=|>|<=|<|\()(.*)$", - expression, - ) - if matched: - token = matched.group(1) - if token == "(": - # Parenthesized operand - expression = matched.group(2) - (end, _) = FindEndOfExpressionInLine(expression, 0, ["("]) - if end < 0: - return # Unmatched parenthesis - lhs += "(" + expression[0:end] - expression = expression[end:] - elif token in ("&&", "||"): - # Logical and/or operators. This means the expression - # contains more than one term, for example: - # CHECK(42 < a && a < b); - # - # These are not replaceable with CHECK_LE, so bail out early. - return - elif token in ("<<", "<<=", ">>", ">>=", "->*", "->"): - # Non-relational operator - lhs += token - expression = matched.group(2) - else: - # Relational operator - operator = token - rhs = matched.group(2) - break - else: - # Unparenthesized operand. Instead of appending to lhs one character - # at a time, we do another regular expression match to consume several - # characters at once if possible. Trivial benchmark shows that this - # is more efficient when the operands are longer than a single - # character, which is generally the case. - matched = Match(r"^([^-=!<>()&|]+)(.*)$", expression) - if not matched: - matched = Match(r"^(\s*\S)(.*)$", expression) - if not matched: - break - lhs += matched.group(1) - expression = matched.group(2) - - # Only apply checks if we got all parts of the boolean expression - if not (lhs and operator and rhs): + # Now construct the body of the conditional. This consists of the portion + # of the opening line after the {, all lines until the closing line, + # and the portion of the closing line before the }. + if (clean_lines.raw_lines[opening_linenum] != + CleanseComments(clean_lines.raw_lines[opening_linenum])): + # Opening line ends with a comment, so conditional isn't empty. return - - # Check that rhs do not contain logical operators. We already know - # that lhs is fine since the loop above parses out && and ||. - if rhs.find("&&") > -1 or rhs.find("||") > -1: + if closing_linenum > opening_linenum: + # Opening line after the {. Ignore comments here since we checked above. + bodylist = list(opening_line[opening_pos+1:]) + # All lines until closing line, excluding closing line, with comments. + bodylist.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum]) + # Closing line before the }. Won't (and can't) have comments. + bodylist.append(clean_lines.elided[closing_linenum][:closing_pos-1]) + body = '\n'.join(bodylist) + else: + # If statement has brackets and fits on a single line. + body = opening_line[opening_pos+1:closing_pos-1] + + # Check if the body is empty + if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): return - - # At least one of the operands must be a constant literal. This is - # to avoid suggesting replacements for unprintable things like - # CHECK(variable != iterator) - # - # The following pattern matches decimal, hex integers, strings, and - # characters (in that order). - lhs = lhs.strip() - rhs = rhs.strip() - match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' - if Match(match_constant, lhs) or Match(match_constant, rhs): - # Note: since we know both lhs and rhs, we can provide a more - # descriptive error message like: - # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) - # Instead of: - # Consider using CHECK_EQ instead of CHECK(a == b) - # - # We are still keeping the less descriptive message because if lhs - # or rhs gets long, the error message might become unreadable. - error( - filename, - linenum, - "readability/check", - 2, - "Consider using %s instead of %s(a %s b)" - % (_CHECK_REPLACEMENT[check_macro][operator], check_macro, operator), - ) - - -def CheckAltTokens(filename, clean_lines, linenum, error): - """Check alternative keywords being used in boolean expressions. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Avoid preprocessor lines - if Match(r"^\s*#", line): - return - - # Last ditch effort to avoid multi-line comments. This will not help - # if the comment started before the current line or ended after the - # current line, but it catches most of the false positives. At least, - # it provides a way to workaround this warning for people who use - # multi-line comments in preprocessor macros. - # - # TODO(unknown): remove this once cpplint has better support for - # multi-line comments. - if line.find("/*") >= 0 or line.find("*/") >= 0: - return - - for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): - error( - filename, - linenum, - "readability/alt_tokens", - 2, - "Use operator %s instead of %s" - % (_ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)), - ) - - -def GetLineWidth(line): - """Determines the width of the line in column positions. - - Args: - line: A string, which may be a Unicode string. - - Returns: - The width of the line in column positions, accounting for Unicode - combining characters and wide characters. - """ - if isinstance(line, unicode): - width = 0 - for uc in unicodedata.normalize("NFC", line): - if unicodedata.east_asian_width(uc) in ("W", "F"): - width += 2 - elif not unicodedata.combining(uc): - # Issue 337 - # https://mail.python.org/pipermail/python-list/2012-August/628809.html - if (sys.version_info.major, sys.version_info.minor) <= (3, 2): - # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81 - is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4 - # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564 - is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF - if not is_wide_build and is_low_surrogate: - width -= 1 - - width += 1 - return width - else: - return len(line) - - -def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, error): - """Checks rules from the 'C++ style rules' section of cppguide.html. - - Most of these rules are hard to test (naming, comment style), but we - do what we can. In particular we check for 2-space indents, line lengths, - tab usage, spaces inside code, etc. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - file_extension: The extension (without the dot) of the filename. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - - # Don't use "elided" lines here, otherwise we can't check commented lines. - # Don't want to use "raw" either, because we don't want to check inside C++11 - # raw strings, - raw_lines = clean_lines.lines_without_raw_strings - line = raw_lines[linenum] - prev = raw_lines[linenum - 1] if linenum > 0 else "" - - if line.find("\t") != -1: - error(filename, linenum, "whitespace/tab", 1, "Tab found; better to use spaces") - - # One or three blank spaces at the beginning of the line is weird; it's - # hard to reconcile that with 2-space indents. - # NOTE: here are the conditions rob pike used for his tests. Mine aren't - # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces - # if(RLENGTH > 20) complain = 0; - # if(match($0, " +(error|private|public|protected):")) complain = 0; - # if(match(prev, "&& *$")) complain = 0; - # if(match(prev, "\\|\\| *$")) complain = 0; - # if(match(prev, "[\",=><] *$")) complain = 0; - # if(match($0, " <<")) complain = 0; - # if(match(prev, " +for \\(")) complain = 0; - # if(prevodd && match(prevprev, " +for \\(")) complain = 0; - scope_or_label_pattern = r"\s*\w+\s*:\s*\\?$" - classinfo = nesting_state.InnermostClass() - initial_spaces = 0 - cleansed_line = clean_lines.elided[linenum] - while initial_spaces < len(line) and line[initial_spaces] == " ": - initial_spaces += 1 - # There are certain situations we allow one space, notably for - # section labels, and also lines containing multi-line raw strings. - # We also don't check for lines that look like continuation lines - # (of lines ending in double quotes, commas, equals, or angle brackets) - # because the rules for how to indent those are non-trivial. - if ( - not Search(r'[",=><] *$', prev) - and (initial_spaces == 1 or initial_spaces == 3) - and not Match(scope_or_label_pattern, cleansed_line) - and not (clean_lines.raw_lines[linenum] != line and Match(r'^\s*""', line)) - ): - error( - filename, - linenum, - "whitespace/indent", - 3, - "Weird number of spaces at line-start. " "Are you using a 2-space indent?", - ) - - if line and line[-1].isspace(): - error( - filename, - linenum, - "whitespace/end_of_line", - 4, - "Line ends in whitespace. Consider deleting these extra spaces.", - ) - - # Check if the line is a header guard. - is_header_guard = False - if IsHeaderExtension(file_extension): - cppvar = GetHeaderGuardCPPVariable(filename) - if ( - line.startswith("#ifndef %s" % cppvar) - or line.startswith("#define %s" % cppvar) - or line.startswith("#endif // %s" % cppvar) - ): - is_header_guard = True - # #include lines and header guards can be long, since there's no clean way to - # split them. - # - # URLs can be long too. It's possible to split these, but it makes them - # harder to cut&paste. - # - # The "$Id:...$" comment may also get very long without it being the - # developers fault. - # - # Doxygen documentation copying can get pretty long when using an overloaded - # function declaration - if ( - not line.startswith("#include") - and not is_header_guard - and not Match(r"^\s*//.*http(s?)://\S*$", line) - and not Match(r"^\s*//\s*[^\s]*$", line) - and not Match(r"^// \$Id:.*#[0-9]+ \$$", line) - and not Match(r"^\s*/// [@\\](copydoc|copydetails|copybrief) .*$", line) - ): - line_width = GetLineWidth(line) - if line_width > _line_length: - error( - filename, - linenum, - "whitespace/line_length", - 2, - "Lines should be <= %i characters long" % _line_length, - ) - - if ( - cleansed_line.count(";") > 1 - and - # allow simple single line lambdas - not Match(r"^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}\n\r]*\}", line) - and - # for loops are allowed two ;'s (and may run over two lines). - cleansed_line.find("for") == -1 - and ( - GetPreviousNonBlankLine(clean_lines, linenum)[0].find("for") == -1 - or GetPreviousNonBlankLine(clean_lines, linenum)[0].find(";") != -1 - ) - and - # It's ok to have many commands in a switch case that fits in 1 line - not ( - (cleansed_line.find("case ") != -1 or cleansed_line.find("default:") != -1) - and cleansed_line.find("break;") != -1 - ) - ): - error( - filename, - linenum, - "whitespace/newline", - 0, - "More than one command on the same line", - ) - - # Some more style checks - CheckBraces(filename, clean_lines, linenum, error) - CheckTrailingSemicolon(filename, clean_lines, linenum, error) - CheckEmptyBlockBody(filename, clean_lines, linenum, error) - CheckSpacing(filename, clean_lines, linenum, nesting_state, error) - CheckOperatorSpacing(filename, clean_lines, linenum, error) - CheckParenthesisSpacing(filename, clean_lines, linenum, error) - CheckCommaSpacing(filename, clean_lines, linenum, error) - CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) - CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) - CheckCheck(filename, clean_lines, linenum, error) - CheckAltTokens(filename, clean_lines, linenum, error) - classinfo = nesting_state.InnermostClass() - if classinfo: - CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) + # The body is empty. Now make sure there's not an else clause. + current_linenum = closing_linenum + current_line_fragment = closing_line[closing_pos:] + # Loop until EOF or find anything that's not whitespace or else clause. + while re.search(r'^\s*$|^(?=\s*else)', current_line_fragment): + if re.search(r'^(?=\s*else)', current_line_fragment): + # Found an else clause, so don't log an error. + return + current_linenum += 1 + if current_linenum == len(clean_lines.elided): + break + current_line_fragment = clean_lines.elided[current_linenum] + + # The body is empty and there's no else clause until EOF or other code. + error(filename, end_linenum, 'whitespace/empty_if_body', 4, + ('If statement had no body and no else clause')) -_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') -# Matches the first component of a filename delimited by -s and _s. That is: -# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' -# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' -_RE_FIRST_COMPONENT = re.compile(r"^[^-_.]+") - - -def _DropCommonSuffixes(filename): - """Drops common suffixes like _test.cc or -inl.h from filename. - - For example: - >>> _DropCommonSuffixes('foo/foo-inl.h') - 'foo/foo' - >>> _DropCommonSuffixes('foo/bar/foo.cc') - 'foo/bar/foo' - >>> _DropCommonSuffixes('foo/foo_internal.h') - 'foo/foo' - >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') - 'foo/foo_unusualinternal' - - Args: - filename: The input filename. - - Returns: - The filename with the common suffix removed. - """ - for suffix in itertools.chain( - ( - "%s.%s" % (test_suffix.lstrip("_"), ext) - for test_suffix, ext in itertools.product( - _test_suffixes, GetNonHeaderExtensions() - ) - ), - ( - "%s.%s" % (suffix, ext) - for suffix, ext in itertools.product( - ["inl", "imp", "internal"], GetHeaderExtensions() - ) - ), - ): - if ( - filename.endswith(suffix) - and len(filename) > len(suffix) - and filename[-len(suffix) - 1] in ("-", "_") - ): - return filename[: -len(suffix) - 1] - return os.path.splitext(filename)[0] - - -def _ClassifyInclude(fileinfo, include, is_system): - """Figures out what kind of header 'include' is. - - Args: - fileinfo: The current file cpplint is running over. A FileInfo instance. - include: The path to a #included file. - is_system: True if the #include used <> rather than "". - - Returns: - One of the _XXX_HEADER constants. - - For example: - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) - _C_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) - _CPP_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) - _LIKELY_MY_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), - ... 'bar/foo_other_ext.h', False) - _POSSIBLE_MY_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) - _OTHER_HEADER - """ - # This is a list of all standard c++ header files, except - # those already checked for above. - is_cpp_h = include in _CPP_HEADERS - - # Headers with C++ extensions shouldn't be considered C system headers - if is_system and os.path.splitext(include)[1] in [".hpp", ".hxx", ".h++"]: - is_system = False - - if is_system: - if is_cpp_h: - return _CPP_SYS_HEADER - else: - return _C_SYS_HEADER - - # If the target file and the include we're checking share a - # basename when we drop common extensions, and the include - # lives in . , then it's likely to be owned by the target file. - target_dir, target_base = os.path.split( - _DropCommonSuffixes(fileinfo.RepositoryName()) - ) - include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) - target_dir_pub = os.path.normpath(target_dir + "/../public") - target_dir_pub = target_dir_pub.replace("\\", "/") - if target_base == include_base and ( - include_dir == target_dir or include_dir == target_dir_pub - ): - return _LIKELY_MY_HEADER - - # If the target and include share some initial basename - # component, it's possible the target is implementing the - # include, so it's allowed to be first, but we'll never - # complain if it's not there. - target_first_component = _RE_FIRST_COMPONENT.match(target_base) - include_first_component = _RE_FIRST_COMPONENT.match(include_base) - if ( - target_first_component - and include_first_component - and target_first_component.group(0) == include_first_component.group(0) - ): - return _POSSIBLE_MY_HEADER - - return _OTHER_HEADER - - -def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): - """Check rules that are applicable to #include lines. - - Strings on #include lines are NOT removed from elided line, to make - certain tasks easier. However, to prevent false positives, checks - applicable to #include lines in CheckLanguage must be put here. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - include_state: An _IncludeState instance in which the headers are inserted. - error: The function to call with any errors found. - """ - fileinfo = FileInfo(filename) - line = clean_lines.lines[linenum] - - # "include" should use the new style "foo/bar.h" instead of just "bar.h" - # Only do this check if the included header follows google naming - # conventions. If not, assume that it's a 3rd party API that - # requires special include conventions. - # - # We also make an exception for Lua headers, which follow google - # naming convention but not the include convention. - match = Match(r'#include\s*"([^/]+\.h)"', line) - if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)): - error( - filename, - linenum, - "build/include_subdir", - 4, - "Include the directory when naming .h files", - ) - - # we shouldn't include a file more than once. actually, there are a - # handful of instances where doing so is okay, but in general it's - # not. - match = _RE_PATTERN_INCLUDE.search(line) - if match: - include = match.group(2) - is_system = match.group(1) == "<" - duplicate_line = include_state.FindHeader(include) - if duplicate_line >= 0: - error( - filename, - linenum, - "build/include", - 4, - '"%s" already included at %s:%s' % (include, filename, duplicate_line), - ) - return - - for extension in GetNonHeaderExtensions(): - if include.endswith("." + extension) and os.path.dirname( - fileinfo.RepositoryName() - ) != os.path.dirname(include): - error( - filename, - linenum, - "build/include", - 4, - "Do not include ." + extension + " files from other packages", - ) - return - - # We DO want to include a 3rd party looking header if it matches the - # filename. Otherwise we get an erroneous error "...should include its - # header" error later. - third_src_header = False - for ext in GetHeaderExtensions(): - basefilename = filename[0 : len(filename) - len(fileinfo.Extension())] - headerfile = basefilename + "." + ext - headername = FileInfo(headerfile).RepositoryName() - if headername in include or include in headername: - third_src_header = True - break - - if third_src_header or not _THIRD_PARTY_HEADERS_PATTERN.match(include): - include_state.include_list[-1].append((include, linenum)) - - # We want to ensure that headers appear in the right order: - # 1) for foo.cc, foo.h (preferred location) - # 2) c system files - # 3) cpp system files - # 4) for foo.cc, foo.h (deprecated location) - # 5) other google headers - # - # We classify each include statement as one of those 5 types - # using a number of techniques. The include_state object keeps - # track of the highest type seen, and complains if we see a - # lower type after that. - error_message = include_state.CheckNextIncludeOrder( - _ClassifyInclude(fileinfo, include, is_system) - ) - if error_message: - error( - filename, - linenum, - "build/include_order", - 4, - "%s. Should be: %s.h, c system, c++ system, other." - % (error_message, fileinfo.BaseName()), - ) - canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) - if not include_state.IsInAlphabeticalOrder( - clean_lines, linenum, canonical_include - ): - error( - filename, - linenum, - "build/include_alpha", - 4, - 'Include "%s" not in alphabetical order' % include, - ) - include_state.SetLastHeader(canonical_include) - - -def _GetTextInside(text, start_pattern): - r"""Retrieves all the text between matching open and close parentheses. - - Given a string of lines and a regular expression string, retrieve all the text - following the expression and between opening punctuation symbols like - (, [, or {, and the matching close-punctuation symbol. This properly nested - occurrences of the punctuations, so for the text like - printf(a(), b(c())); - a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. - start_pattern must match string having an open punctuation symbol at the end. - - Args: - text: The lines to extract text. Its comments and strings must be elided. - It can be single line and can span multiple lines. - start_pattern: The regexp string indicating where to start extracting - the text. - Returns: - The extracted text. - None if either the opening string or ending punctuation could not be found. - """ - # TODO(unknown): Audit cpplint.py to see what places could be profitably - # rewritten to use _GetTextInside (and use inferior regexp matching today). - - # Give opening punctuations to get the matching close-punctuations. - matching_punctuation = {"(": ")", "{": "}", "[": "]"} - closing_punctuation = set(itervalues(matching_punctuation)) - - # Find the position to start extracting text. - match = re.search(start_pattern, text, re.M) - if not match: # start_pattern not found in text. - return None - start_position = match.end(0) - - assert start_position > 0, "start_pattern must ends with an opening punctuation." - assert ( - text[start_position - 1] in matching_punctuation - ), "start_pattern must ends with an opening punctuation." - # Stack of closing punctuations we expect to have in text after position. - punctuation_stack = [matching_punctuation[text[start_position - 1]]] - position = start_position - while punctuation_stack and position < len(text): - if text[position] == punctuation_stack[-1]: - punctuation_stack.pop() - elif text[position] in closing_punctuation: - # A closing punctuation without matching opening punctuations. - return None - elif text[position] in matching_punctuation: - punctuation_stack.append(matching_punctuation[text[position]]) - position += 1 - if punctuation_stack: - # Opening punctuations left without matching close-punctuations. - return None - # punctuations match. - return text[start_position : position - 1] - - -# Patterns for matching call-by-reference parameters. -# -# Supports nested templates up to 2 levels deep using this messy pattern: -# < (?: < (?: < [^<>]* -# > -# | [^<>] )* -# > -# | [^<>] )* -# > -_RE_PATTERN_IDENT = r"[_a-zA-Z]\w*" # =~ [[:alpha:]][[:alnum:]]* -_RE_PATTERN_TYPE = ( - r"(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?" - r"(?:\w|" - r"\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|" - r"::)+" -) -# A call-by-reference parameter ends with '& identifier'. -_RE_PATTERN_REF_PARAM = re.compile( - r"(" + _RE_PATTERN_TYPE + r"(?:\s*(?:\bconst\b|[*]))*\s*" - r"&\s*" + _RE_PATTERN_IDENT + r")\s*(?:=[^,()]+)?[,)]" -) -# A call-by-const-reference parameter either ends with 'const& identifier' -# or looks like 'const type& identifier' when 'type' is atomic. -_RE_PATTERN_CONST_REF_PARAM = ( - r"(?:.*\s*\bconst\s*&\s*" - + _RE_PATTERN_IDENT - + r"|const\s+" - + _RE_PATTERN_TYPE - + r"\s*&\s*" - + _RE_PATTERN_IDENT - + r")" -) -# Stream types. -_RE_PATTERN_REF_STREAM_PARAM = r"(?:.*stream\s*&\s*" + _RE_PATTERN_IDENT + r")" - - -def CheckLanguage( - filename, clean_lines, linenum, file_extension, include_state, nesting_state, error -): - """Checks rules from the 'C++ language rules' section of cppguide.html. - - Some of these rules are hard to test (function overloading, using - uint32 inappropriately), but we do the best we can. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - file_extension: The extension (without the dot) of the filename. - include_state: An _IncludeState instance in which the headers are inserted. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # If the line is empty or consists of entirely a comment, no need to - # check it. - line = clean_lines.elided[linenum] - if not line: - return - - match = _RE_PATTERN_INCLUDE.search(line) - if match: - CheckIncludeLine(filename, clean_lines, linenum, include_state, error) - return - - # Reset include state across preprocessor directives. This is meant - # to silence warnings for conditional includes. - match = Match(r"^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b", line) - if match: - include_state.ResetSection(match.group(1)) - - # Perform other checks now that we are sure that this is not an include line - CheckCasts(filename, clean_lines, linenum, error) - CheckGlobalStatic(filename, clean_lines, linenum, error) - CheckPrintf(filename, clean_lines, linenum, error) - - if IsHeaderExtension(file_extension): - # TODO(unknown): check that 1-arg constructors are explicit. - # How to tell it's a constructor? - # (handled in CheckForNonStandardConstructs for now) - # TODO(unknown): check that classes declare or disable copy/assign - # (level 1 error) - pass - - # Check if people are using the verboten C basic types. The only exception - # we regularly allow is "unsigned short port" for port - # or if it's used in a static assert - if Search(r"\bstatic_assert\b", line): - pass - elif Search(r"\bshort port\b", line): - if not Search(r"\bunsigned short port\b", line): - error( - filename, - linenum, - "runtime/int", - 4, - 'Use "unsigned short" for ports, not "short"', - ) - else: - match = Search(r"\b(short|long(?! +double)|long long)\b", line) - if match: - error( - filename, - linenum, - "runtime/int", - 4, - "Use int16/int64/etc, rather than the C type %s" % match.group(1), - ) - - # Check if some verboten operator overloading is going on - # TODO(unknown): catch out-of-line unary operator&: - # class X {}; - # int operator&(const X& x) { return 42; } // unary operator& - # The trick is it's hard to tell apart from binary operator&: - # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& - if Search(r"\boperator\s*&\s*\(\s*\)", line): - error( - filename, - linenum, - "runtime/operator", - 4, - "Unary operator& is dangerous. Do not use it.", - ) - - # Check for suspicious usage of "if" like - # } if (a == b) { - if Search(r"\}\s*if\s*\(", line): - error( - filename, - linenum, - "readability/braces", - 4, - 'Did you mean "else if"? If not, start a new line for "if".', - ) - - # Check for potential format string bugs like printf(foo). - # We constrain the pattern not to pick things like DocidForPrintf(foo). - # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) - # TODO(unknown): Catch the following case. Need to change the calling - # convention of the whole function to process multiple line to handle it. - # printf( - # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); - printf_args = _GetTextInside(line, r"(?i)\b(string)?printf\s*\(") - if printf_args: - match = Match(r"([\w.\->()]+)$", printf_args) - if match and match.group(1) != "__VA_ARGS__": - function_name = re.search(r"\b((?:string)?printf)\s*\(", line, re.I).group( - 1 - ) - error( - filename, - linenum, - "runtime/printf", - 4, - 'Potential format string bug. Do %s("%%s", %s) instead.' - % (function_name, match.group(1)), - ) - - # Check for potential memset bugs like memset(buf, sizeof(buf), 0). - match = Search(r"memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)", line) - if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): - error( - filename, - linenum, - "runtime/memset", - 4, - 'Did you mean "memset(%s, 0, %s)"?' % (match.group(1), match.group(2)), - ) - - if Search(r"\busing namespace\b", line): - if Search(r"\bliterals\b", line): - error( - filename, - linenum, - "build/namespaces_literals", - 5, - "Do not use namespace using-directives. " - "Use using-declarations instead.", - ) - else: - error( - filename, - linenum, - "build/namespaces", - 5, - "Do not use namespace using-directives. " - "Use using-declarations instead.", - ) - - # Detect variable-length arrays. - match = Match(r"\s*(.+::)?(\w+) [a-z]\w*\[(.+)];", line) - if ( - match - and match.group(2) != "return" - and match.group(2) != "delete" - and match.group(3).find("]") == -1 - ): - # Split the size using space and arithmetic operators as delimiters. - # If any of the resulting tokens are not compile time constants then - # report the error. - tokens = re.split(r"\s|\+|\-|\*|\/|<<|>>]", match.group(3)) - is_const = True - skip_next = False - for tok in tokens: - if skip_next: - skip_next = False - continue - - if Search(r"sizeof\(.+\)", tok): - continue - if Search(r"arraysize\(\w+\)", tok): - continue - - tok = tok.lstrip("(") - tok = tok.rstrip(")") - if not tok: - continue - if Match(r"\d+", tok): - continue - if Match(r"0[xX][0-9a-fA-F]+", tok): - continue - if Match(r"k[A-Z0-9]\w*", tok): - continue - if Match(r"(.+::)?k[A-Z0-9]\w*", tok): - continue - if Match(r"(.+::)?[A-Z][A-Z0-9_]*", tok): - continue - # A catch all for tricky sizeof cases, including 'sizeof expression', - # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' - # requires skipping the next token because we split on ' ' and '*'. - if tok.startswith("sizeof"): - skip_next = True - continue - is_const = False - break - if not is_const: - error( - filename, - linenum, - "runtime/arrays", - 1, - "Do not use variable-length arrays. Use an appropriately named " - "('k' followed by CamelCase) compile-time constant for the size.", - ) - - # Check for use of unnamed namespaces in header files. Registration - # macros are typically OK, so we allow use of "namespace {" on lines - # that end with backslashes. - if ( - IsHeaderExtension(file_extension) - and Search(r"\bnamespace\s*{", line) - and line[-1] != "\\" - ): - error( - filename, - linenum, - "build/namespaces", - 4, - "Do not use unnamed namespaces in header files. See " - "https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces" - " for more information.", - ) - - -def CheckGlobalStatic(filename, clean_lines, linenum, error): - """Check for unsafe global or static objects. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Match two lines at a time to support multiline declarations - if linenum + 1 < clean_lines.NumLines() and not Search(r"[;({]", line): - line += clean_lines.elided[linenum + 1].strip() - - # Check for people declaring static/global STL strings at the top level. - # This is dangerous because the C++ language does not guarantee that - # globals with constructors are initialized before the first access, and - # also because globals can be destroyed when some threads are still running. - # TODO(unknown): Generalize this to also find static unique_ptr instances. - # TODO(unknown): File bugs for clang-tidy to find these. - match = Match( - r"((?:|static +)(?:|const +))(?::*std::)?string( +const)? +" - r"([a-zA-Z0-9_:]+)\b(.*)", - line, - ) - - # Remove false positives: - # - String pointers (as opposed to values). - # string *pointer - # const string *pointer - # string const *pointer - # string *const pointer - # - # - Functions and template specializations. - # string Function(... - # string Class::Method(... - # - # - Operators. These are matched separately because operator names - # cross non-word boundaries, and trying to match both operators - # and functions at the same time would decrease accuracy of - # matching identifiers. - # string Class::operator*() - if ( - match - and not Search(r"\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w", line) - and not Search(r"\boperator\W", line) - and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4)) - ): - if Search(r"\bconst\b", line): - error( - filename, - linenum, - "runtime/string", - 4, - "For a static/global string constant, use a C style string " - 'instead: "%schar%s %s[]".' - % (match.group(1), match.group(2) or "", match.group(3)), - ) - else: - error( - filename, - linenum, - "runtime/string", - 4, - "Static/global string variables are not permitted.", - ) - - if Search(r"\b([A-Za-z0-9_]*_)\(\1\)", line) or Search( - r"\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)", line - ): - error( - filename, - linenum, - "runtime/init", - 4, - "You seem to be initializing a member variable with itself.", - ) - - -def CheckPrintf(filename, clean_lines, linenum, error): - """Check for printf related issues. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # When snprintf is used, the second argument shouldn't be a literal. - match = Search(r"snprintf\s*\(([^,]*),\s*([0-9]*)\s*,", line) - if match and match.group(2) != "0": - # If 2nd arg is zero, snprintf is used to calculate size. - error( - filename, - linenum, - "runtime/printf", - 3, - "If you can, use sizeof(%s) instead of %s as the 2nd arg " - "to snprintf." % (match.group(1), match.group(2)), - ) - - # Check if some verboten C functions are being used. - if Search(r"\bsprintf\s*\(", line): - error( - filename, - linenum, - "runtime/printf", - 5, - "Never use sprintf. Use snprintf instead.", - ) - match = Search(r"\b(strcpy|strcat)\s*\(", line) - if match: - error( - filename, - linenum, - "runtime/printf", - 4, - "Almost always, snprintf is better than %s" % match.group(1), - ) - - -def IsDerivedFunction(clean_lines, linenum): - """Check if current line contains an inherited function. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line contains a function with "override" - virt-specifier. - """ - # Scan back a few lines for start of current function - for i in xrange(linenum, max(-1, linenum - 10), -1): - match = Match(r"^([^()]*\w+)\(", clean_lines.elided[i]) - if match: - # Look for "override" after the matching closing parenthesis - line, _, closing_paren = CloseExpression( - clean_lines, i, len(match.group(1)) - ) - return closing_paren >= 0 and Search(r"\boverride\b", line[closing_paren:]) - return False - - -def IsOutOfLineMethodDefinition(clean_lines, linenum): - """Check if current line contains an out-of-line method definition. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line contains an out-of-line method definition. - """ - # Scan back a few lines for start of current function - for i in xrange(linenum, max(-1, linenum - 10), -1): - if Match(r"^([^()]*\w+)\(", clean_lines.elided[i]): - return Match(r"^[^()]*\w+::\w+\(", clean_lines.elided[i]) is not None - return False - - -def IsInitializerList(clean_lines, linenum): - """Check if current line is inside constructor initializer list. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line appears to be inside constructor initializer - list, False otherwise. - """ - for i in xrange(linenum, 1, -1): - line = clean_lines.elided[i] - if i == linenum: - remove_function_body = Match(r"^(.*)\{\s*$", line) - if remove_function_body: - line = remove_function_body.group(1) - - if Search(r"\s:\s*\w+[({]", line): - # A lone colon tend to indicate the start of a constructor - # initializer list. It could also be a ternary operator, which - # also tend to appear in constructor initializer lists as - # opposed to parameter lists. - return True - if Search(r"\}\s*,\s*$", line): - # A closing brace followed by a comma is probably the end of a - # brace-initialized member in constructor initializer list. - return True - if Search(r"[{};]\s*$", line): - # Found one of the following: - # - A closing brace or semicolon, probably the end of the previous - # function. - # - An opening brace, probably the start of current class or namespace. - # - # Current line is probably not inside an initializer list since - # we saw one of those things without seeing the starting colon. - return False - - # Got to the beginning of the file without seeing the start of - # constructor initializer list. - return False - - -def CheckForNonConstReference(filename, clean_lines, linenum, nesting_state, error): - """Check for non-const references. - - Separate from CheckLanguage since it scans backwards from current - line, instead of scanning forward. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # Do nothing if there is no '&' on current line. - line = clean_lines.elided[linenum] - if "&" not in line: - return - - # If a function is inherited, current function doesn't have much of - # a choice, so any non-const references should not be blamed on - # derived function. - if IsDerivedFunction(clean_lines, linenum): - return - - # Don't warn on out-of-line method definitions, as we would warn on the - # in-line declaration, if it isn't marked with 'override'. - if IsOutOfLineMethodDefinition(clean_lines, linenum): - return - - # Long type names may be broken across multiple lines, usually in one - # of these forms: - # LongType - # ::LongTypeContinued &identifier - # LongType:: - # LongTypeContinued &identifier - # LongType< - # ...>::LongTypeContinued &identifier - # - # If we detected a type split across two lines, join the previous - # line to current line so that we can match const references - # accordingly. - # - # Note that this only scans back one line, since scanning back - # arbitrary number of lines would be expensive. If you have a type - # that spans more than 2 lines, please use a typedef. - if linenum > 1: - previous = None - if Match(r"\s*::(?:[\w<>]|::)+\s*&\s*\S", line): - # previous_line\n + ::current_line - previous = Search( - r"\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$", - clean_lines.elided[linenum - 1], - ) - elif Match(r"\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S", line): - # previous_line::\n + current_line - previous = Search( - r"\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$", - clean_lines.elided[linenum - 1], - ) - if previous: - line = previous.group(1) + line.lstrip() - else: - # Check for templated parameter that is split across multiple lines - endpos = line.rfind(">") - if endpos > -1: - (_, startline, startpos) = ReverseCloseExpression( - clean_lines, linenum, endpos - ) - if startpos > -1 and startline < linenum: - # Found the matching < on an earlier line, collect all - # pieces up to current line. - line = "" - for i in xrange(startline, linenum + 1): - line += clean_lines.elided[i].strip() - - # Check for non-const references in function parameters. A single '&' may - # found in the following places: - # inside expression: binary & for bitwise AND - # inside expression: unary & for taking the address of something - # inside declarators: reference parameter - # We will exclude the first two cases by checking that we are not inside a - # function body, including one that was just introduced by a trailing '{'. - # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. - if nesting_state.previous_stack_top and not ( - isinstance(nesting_state.previous_stack_top, _ClassInfo) - or isinstance(nesting_state.previous_stack_top, _NamespaceInfo) - ): - # Not at toplevel, not within a class, and not within a namespace - return - - # Avoid initializer lists. We only need to scan back from the - # current line for something that starts with ':'. - # - # We don't need to check the current line, since the '&' would - # appear inside the second set of parentheses on the current line as - # opposed to the first set. - if linenum > 0: - for i in xrange(linenum - 1, max(0, linenum - 10), -1): - previous_line = clean_lines.elided[i] - if not Search(r"[),]\s*$", previous_line): - break - if Match(r"^\s*:\s+\S", previous_line): - return - - # Avoid preprocessors - if Search(r"\\\s*$", line): - return - - # Avoid constructor initializer lists - if IsInitializerList(clean_lines, linenum): - return - - # We allow non-const references in a few standard places, like functions - # called "swap()" or iostream operators like "<<" or ">>". Do not check - # those function parameters. - # - # We also accept & in static_assert, which looks like a function but - # it's actually a declaration expression. - whitelisted_functions = ( - r"(?:[sS]wap(?:<\w:+>)?|" - r"operator\s*[<>][<>]|" - r"static_assert|COMPILE_ASSERT" - r")\s*\(" - ) - if Search(whitelisted_functions, line): - return - elif not Search(r"\S+\([^)]*$", line): - # Don't see a whitelisted function on this line. Actually we - # didn't see any function name on this line, so this is likely a - # multi-line parameter list. Try a bit harder to catch this case. - for i in xrange(2): - if linenum > i and Search( - whitelisted_functions, clean_lines.elided[linenum - i - 1] - ): - return - - decls = ReplaceAll(r"{[^}]*}", " ", line) # exclude function body - for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): - if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter) and not Match( - _RE_PATTERN_REF_STREAM_PARAM, parameter - ): - error( - filename, - linenum, - "runtime/references", - 2, - "Is this a non-const reference? " - "If so, make const or use a pointer: " - + ReplaceAll(" *<", "<", parameter), - ) - - -def CheckCasts(filename, clean_lines, linenum, error): - """Various cast related checks. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] +def FindCheckMacro(line): + """Find a replaceable CHECK-like macro. + + Args: + line: line to search on. + Returns: + (macro name, start position), or (None, -1) if no replaceable + macro is found. + """ + for macro in _CHECK_MACROS: + i = line.find(macro) + if i >= 0: + # Find opening parenthesis. Do a regular expression match here + # to make sure that we are matching the expected CHECK macro, as + # opposed to some other macro that happens to contain the CHECK + # substring. + matched = re.match(r'^(.*\b' + macro + r'\s*)\(', line) + if not matched: + continue + return (macro, len(matched.group(1))) + return (None, -1) - # Check to see if they're using an conversion function cast. - # I just try to capture the most common basic types, though there are more. - # Parameterless conversion functions, such as bool(), are allowed as they are - # probably a member operator declaration or default constructor. - match = Search( - r"(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b" - r"(int|float|double|bool|char|int32|uint32|int64|uint64)" - r"(\([^)].*)", - line, - ) - expecting_function = ExpectingFunctionArgs(clean_lines, linenum) - if match and not expecting_function: - matched_type = match.group(2) - # matched_new_or_template is used to silence two false positives: - # - New operators - # - Template arguments with function types - # - # For template arguments, we match on types immediately following - # an opening bracket without any spaces. This is a fast way to - # silence the common case where the function type is the first - # template argument. False negative with less-than comparison is - # avoided because those operators are usually followed by a space. +def CheckCheck(filename, clean_lines, linenum, error): + """Checks the use of CHECK and EXPECT macros. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Decide the set of replacement macros that should be suggested + lines = clean_lines.elided + (check_macro, start_pos) = FindCheckMacro(lines[linenum]) + if not check_macro: + return + + # Find end of the boolean expression by matching parentheses + (last_line, end_line, end_pos) = CloseExpression( + clean_lines, linenum, start_pos) + if end_pos < 0: + return + + # If the check macro is followed by something other than a + # semicolon, assume users will log their own custom error messages + # and don't suggest any replacements. + if not re.match(r'\s*;', last_line[end_pos:]): + return + + if linenum == end_line: + expression = lines[linenum][start_pos + 1:end_pos - 1] + else: + expression = lines[linenum][start_pos + 1:] + for i in range(linenum + 1, end_line): + expression += lines[i] + expression += last_line[0:end_pos - 1] + + # Parse expression so that we can take parentheses into account. + # This avoids false positives for inputs like "CHECK((a < 4) == b)", + # which is not replaceable by CHECK_LE. + lhs = '' + rhs = '' + operator = None + while expression: + matched = re.match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' + r'==|!=|>=|>|<=|<|\()(.*)$', expression) + if matched: + token = matched.group(1) + if token == '(': + # Parenthesized operand + expression = matched.group(2) + (end, _) = FindEndOfExpressionInLine(expression, 0, ['(']) + if end < 0: + return # Unmatched parenthesis + lhs += '(' + expression[0:end] + expression = expression[end:] + elif token in ('&&', '||'): + # Logical and/or operators. This means the expression + # contains more than one term, for example: + # CHECK(42 < a && a < b); # - # function // bracket + no space = false positive - # value < double(42) // bracket + space = true positive - matched_new_or_template = match.group(1) - - # Avoid arrays by looking for brackets that come after the closing - # parenthesis. - if Match(r"\([^()]+\)\s*\[", match.group(3)): - return - - # Other things to ignore: - # - Function pointers - # - Casts to pointer types - # - Placement new - # - Alias declarations - matched_funcptr = match.group(3) - if ( - matched_new_or_template is None - and not ( - matched_funcptr - and ( - Match(r"\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(", matched_funcptr) - or matched_funcptr.startswith("(*)") - ) - ) - and not Match(r"\s*using\s+\S+\s*=\s*" + matched_type, line) - and not Search(r"new\(\S+\)\s*" + matched_type, line) - ): - error( - filename, - linenum, - "readability/casting", - 4, - "Using deprecated casting style. " - "Use static_cast<%s>(...) instead" % matched_type, - ) - - if not expecting_function: - CheckCStyleCast( - filename, - clean_lines, - linenum, - "static_cast", - r"\((int|float|double|bool|char|u?int(16|32|64))\)", - error, - ) - - # This doesn't catch all cases. Consider (const char * const)"hello". - # - # (char *) "foo" should always be a const_cast (reinterpret_cast won't - # compile). - if CheckCStyleCast( - filename, clean_lines, linenum, "const_cast", r'\((char\s?\*+\s?)\)\s*"', error - ): - pass - else: - # Check pointer casts for other than string constants - CheckCStyleCast( - filename, - clean_lines, - linenum, - "reinterpret_cast", - r"\((\w+\s?\*+\s?)\)", - error, - ) - - # In addition, we look for people taking the address of a cast. This - # is dangerous -- casts can assign to temporaries, so the pointer doesn't - # point where you think. - # - # Some non-identifier character is required before the '&' for the - # expression to be recognized as a cast. These are casts: - # expression = &static_cast(temporary()); - # function(&(int*)(temporary())); - # - # This is not a cast: - # reference_type&(int* function_param); - match = Search( - r"(?:[^\w]&\(([^)*][^)]*)\)[\w(])|" - r"(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)", - line, - ) - if match: - # Try a better error message when the & is bound to something - # dereferenced by the casted pointer, as opposed to the casted - # pointer itself. - parenthesis_error = False - match = Match(r"^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<", line) - if match: - _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) - if x1 >= 0 and clean_lines.elided[y1][x1] == "(": - _, y2, x2 = CloseExpression(clean_lines, y1, x1) - if x2 >= 0: - extended_line = clean_lines.elided[y2][x2:] - if y2 < clean_lines.NumLines() - 1: - extended_line += clean_lines.elided[y2 + 1] - if Match(r"\s*(?:->|\[)", extended_line): - parenthesis_error = True - - if parenthesis_error: - error( - filename, - linenum, - "readability/casting", - 4, - ( - "Are you taking an address of something dereferenced " - "from a cast? Wrapping the dereferenced expression in " - "parentheses will make the binding more obvious" - ), - ) - else: - error( - filename, - linenum, - "runtime/casting", - 4, - ( - "Are you taking an address of a cast? " - "This is dangerous: could be a temp var. " - "Take the address before doing the cast, rather than after" - ), - ) - - -def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): - """Checks for a C-style cast by looking for the pattern. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - cast_type: The string for the C++ cast to recommend. This is either - reinterpret_cast, static_cast, or const_cast, depending. - pattern: The regular expression used to find C-style casts. - error: The function to call with any errors found. - - Returns: - True if an error was emitted. - False otherwise. - """ - line = clean_lines.elided[linenum] - match = Search(pattern, line) - if not match: - return False - - # Exclude lines with keywords that tend to look like casts - context = line[0 : match.start(1) - 1] - if Match(r".*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$", context): - return False - - # Try expanding current context to see if we one level of - # parentheses inside a macro. - if linenum > 0: - for i in xrange(linenum - 1, max(0, linenum - 5), -1): - context = clean_lines.elided[i] + context - if Match(r".*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$", context): - return False - - # operator++(int) and operator--(int) - if context.endswith(" operator++") or context.endswith(" operator--"): - return False - - # A single unnamed argument for a function tends to look like old style cast. - # If we see those, don't issue warnings for deprecated casts. - remainder = line[match.end(0) :] - if Match(r"^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)", remainder): - return False - - # At this point, all that should be left is actual casts. - error( - filename, - linenum, - "readability/casting", - 4, - "Using C-style cast. Use %s<%s>(...) instead" % (cast_type, match.group(1)), - ) - - return True - - -def ExpectingFunctionArgs(clean_lines, linenum): - """Checks whether where function type arguments are expected. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - - Returns: - True if the line at 'linenum' is inside something that expects arguments - of function types. - """ - line = clean_lines.elided[linenum] - return Match(r"^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(", line) or ( - linenum >= 2 - and ( - Match( - r"^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$", - clean_lines.elided[linenum - 1], - ) - or Match( - r"^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$", - clean_lines.elided[linenum - 2], - ) - or Search(r"\bstd::m?function\s*\<\s*$", clean_lines.elided[linenum - 1]) - ) - ) - - -_HEADERS_CONTAINING_TEMPLATES = ( - ("", ("deque",)), - ( - "", - ( - "unary_function", - "binary_function", - "plus", - "minus", - "multiplies", - "divides", - "modulus", - "negate", - "equal_to", - "not_equal_to", - "greater", - "less", - "greater_equal", - "less_equal", - "logical_and", - "logical_or", - "logical_not", - "unary_negate", - "not1", - "binary_negate", - "not2", - "bind1st", - "bind2nd", - "pointer_to_unary_function", - "pointer_to_binary_function", - "ptr_fun", - "mem_fun_t", - "mem_fun", - "mem_fun1_t", - "mem_fun1_ref_t", - "mem_fun_ref_t", - "const_mem_fun_t", - "const_mem_fun1_t", - "const_mem_fun_ref_t", - "const_mem_fun1_ref_t", - "mem_fun_ref", - ), - ), - ("", ("numeric_limits",)), - ("", ("list",)), - ("", ("multimap",)), - ( - "", - ( - "allocator", - "make_shared", - "make_unique", - "shared_ptr", - "unique_ptr", - "weak_ptr", - ), - ), - ( - "", - ( - "queue", - "priority_queue", - ), - ), - ("", ("multiset",)), - ("", ("stack",)), - ( - "", - ( - "char_traits", - "basic_string", - ), - ), - ("", ("tuple",)), - ("", ("unordered_map", "unordered_multimap")), - ("", ("unordered_set", "unordered_multiset")), - ("", ("pair",)), - ("", ("vector",)), - # gcc extensions. - # Note: std::hash is their hash, ::hash is our hash - ( - "", - ( - "hash_map", - "hash_multimap", - ), - ), - ( - "", - ( - "hash_set", - "hash_multiset", - ), - ), - ("", ("slist",)), -) - -_HEADERS_MAYBE_TEMPLATES = ( - ( - "", - ( - "copy", - "max", - "min", - "min_element", - "sort", - "transform", - ), - ), - ("", ("forward", "make_pair", "move", "swap")), -) - -_RE_PATTERN_STRING = re.compile(r"\bstring\b") - -_re_pattern_headers_maybe_templates = [] -for _header, _templates in _HEADERS_MAYBE_TEMPLATES: - for _template in _templates: - # Match max(..., ...), max(..., ...), but not foo->max, foo.max or - # 'type::max()'. - _re_pattern_headers_maybe_templates.append( - ( - re.compile(r"[^>.]\b" + _template + r"(<.*?>)?\([^\)]"), - _template, - _header, - ) - ) -# Match set, but not foo->set, foo.set -_re_pattern_headers_maybe_templates.append( - (re.compile(r"[^>.]\bset\s*\<"), "set<>", "") -) -# Match 'map var' and 'std::map(...)', but not 'map(...)'' -_re_pattern_headers_maybe_templates.append( - (re.compile(r"(std\b::\bmap\s*\<)|(^(std\b::\b)map\b\(\s*\<)"), "map<>", "") -) - -# Other scripts may reach in and modify this pattern. -_re_pattern_templates = [] -for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: - for _template in _templates: - _re_pattern_templates.append( - (re.compile(r"(\<|\b)" + _template + r"\s*\<"), _template + "<>", _header) - ) - - -def FilesBelongToSameModule(filename_cc, filename_h): - """Check if these two filenames belong to the same module. - - The concept of a 'module' here is a as follows: - foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the - same 'module' if they are in the same directory. - some/path/public/xyzzy and some/path/internal/xyzzy are also considered - to belong to the same module here. + # These are not replaceable with CHECK_LE, so bail out early. + return + elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): + # Non-relational operator + lhs += token + expression = matched.group(2) + else: + # Relational operator + operator = token + rhs = matched.group(2) + break + else: + # Unparenthesized operand. Instead of appending to lhs one character + # at a time, we do another regular expression match to consume several + # characters at once if possible. Trivial benchmark shows that this + # is more efficient when the operands are longer than a single + # character, which is generally the case. + matched = re.match(r'^([^-=!<>()&|]+)(.*)$', expression) + if not matched: + matched = re.match(r'^(\s*\S)(.*)$', expression) + if not matched: + break + lhs += matched.group(1) + expression = matched.group(2) + + # Only apply checks if we got all parts of the boolean expression + if not (lhs and operator and rhs): + return + + # Check that rhs do not contain logical operators. We already know + # that lhs is fine since the loop above parses out && and ||. + if rhs.find('&&') > -1 or rhs.find('||') > -1: + return + + # At least one of the operands must be a constant literal. This is + # to avoid suggesting replacements for unprintable things like + # CHECK(variable != iterator) + # + # The following pattern matches decimal, hex integers, strings, and + # characters (in that order). + lhs = lhs.strip() + rhs = rhs.strip() + match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' + if re.match(match_constant, lhs) or re.match(match_constant, rhs): + # Note: since we know both lhs and rhs, we can provide a more + # descriptive error message like: + # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) + # Instead of: + # Consider using CHECK_EQ instead of CHECK(a == b) + # + # We are still keeping the less descriptive message because if lhs + # or rhs gets long, the error message might become unreadable. + error(filename, linenum, 'readability/check', 2, + f'Consider using {_CHECK_REPLACEMENT[check_macro][operator]}' + f' instead of {check_macro}(a {operator} b)') - If the filename_cc contains a longer path than the filename_h, for example, - '/absolute/path/to/base/sysinfo.cc', and this file would include - 'base/sysinfo.h', this function also produces the prefix needed to open the - header. This is used by the caller of this function to more robustly open the - header file. We don't have access to the real include paths in this context, - so we need this guesswork here. - Known bugs: tools/base/bar.cc and base/bar.h belong to the same module - according to this implementation. Because of this, this function gives - some false positives. This should be sufficiently rare in practice. +def CheckAltTokens(filename, clean_lines, linenum, error): + """Check alternative keywords being used in boolean expressions. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Avoid preprocessor lines + if re.match(r'^\s*#', line): + return + + # Last ditch effort to avoid multi-line comments. This will not help + # if the comment started before the current line or ended after the + # current line, but it catches most of the false positives. At least, + # it provides a way to workaround this warning for people who use + # multi-line comments in preprocessor macros. + # + # TODO(unknown): remove this once cpplint has better support for + # multi-line comments. + if line.find('/*') >= 0 or line.find('*/') >= 0: + return + + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + error(filename, linenum, 'readability/alt_tokens', 2, + f'Use operator {_ALT_TOKEN_REPLACEMENT[match.group(2)]}' + f' instead of {match.group(2)}') - Args: - filename_cc: is the path for the source (e.g. .cc) file - filename_h: is the path for the header path - Returns: - Tuple with a bool and a string: - bool: True if filename_cc and filename_h belong to the same module. - string: the additional prefix needed to open the header file. - """ - fileinfo_cc = FileInfo(filename_cc) - if not fileinfo_cc.Extension().lstrip(".") in GetNonHeaderExtensions(): - return (False, "") +def GetLineWidth(line): + """Determines the width of the line in column positions. + + Args: + line: A string, which may be a Unicode string. + + Returns: + The width of the line in column positions, accounting for Unicode + combining characters and wide characters. + """ + if isinstance(line, str): + width = 0 + for uc in unicodedata.normalize('NFC', line): + if unicodedata.east_asian_width(uc) in ('W', 'F'): + width += 2 + elif not unicodedata.combining(uc): + # Issue 337 + # https://mail.python.org/pipermail/python-list/2012-August/628809.html + if (sys.version_info.major, sys.version_info.minor) <= (3, 2): + # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81 + is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4 + # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564 + is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF + if not is_wide_build and is_low_surrogate: + width -= 1 + + width += 1 + return width + else: + return len(line) + + +def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, + error): + """Checks rules from the 'C++ style rules' section of cppguide.html. + + Most of these rules are hard to test (naming, comment style), but we + do what we can. In particular we check for 2-space indents, line lengths, + tab usage, spaces inside code, etc. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw_lines = clean_lines.lines_without_raw_strings + line = raw_lines[linenum] + prev = raw_lines[linenum - 1] if linenum > 0 else '' + + if line.find('\t') != -1: + error(filename, linenum, 'whitespace/tab', 1, + 'Tab found; better to use spaces') + + # One or three blank spaces at the beginning of the line is weird; it's + # hard to reconcile that with 2-space indents. + # NOTE: here are the conditions rob pike used for his tests. Mine aren't + # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces + # if(RLENGTH > 20) complain = 0; + # if(match($0, " +(error|private|public|protected):")) complain = 0; + # if(match(prev, "&& *$")) complain = 0; + # if(match(prev, "\\|\\| *$")) complain = 0; + # if(match(prev, "[\",=><] *$")) complain = 0; + # if(match($0, " <<")) complain = 0; + # if(match(prev, " +for \\(")) complain = 0; + # if(prevodd && match(prevprev, " +for \\(")) complain = 0; + scope_or_label_pattern = r'\s*(?:public|private|protected|signals)(?:\s+(?:slots\s*)?)?:\s*\\?$' + classinfo = nesting_state.InnermostClass() + initial_spaces = 0 + cleansed_line = clean_lines.elided[linenum] + while initial_spaces < len(line) and line[initial_spaces] == ' ': + initial_spaces += 1 + # There are certain situations we allow one space, notably for + # section labels, and also lines containing multi-line raw strings. + # We also don't check for lines that look like continuation lines + # (of lines ending in double quotes, commas, equals, or angle brackets) + # because the rules for how to indent those are non-trivial. + if (not re.search(r'[",=><] *$', prev) and + (initial_spaces == 1 or initial_spaces == 3) and + not re.match(scope_or_label_pattern, cleansed_line) and + not (clean_lines.raw_lines[linenum] != line and + re.match(r'^\s*""', line))): + error(filename, linenum, 'whitespace/indent', 3, + 'Weird number of spaces at line-start. ' + 'Are you using a 2-space indent?') + + if line and line[-1].isspace(): + error(filename, linenum, 'whitespace/end_of_line', 4, + 'Line ends in whitespace. Consider deleting these extra spaces.') + + # Check if the line is a header guard. + is_header_guard = False + if IsHeaderExtension(file_extension): + cppvar = GetHeaderGuardCPPVariable(filename) + if (line.startswith(f'#ifndef {cppvar}') or + line.startswith(f'#define {cppvar}') or + line.startswith(f'#endif // {cppvar}')): + is_header_guard = True + # #include lines and header guards can be long, since there's no clean way to + # split them. + # + # URLs can be long too. It's possible to split these, but it makes them + # harder to cut&paste. + # + # The "$Id:...$" comment may also get very long without it being the + # developers fault. + # + # Doxygen documentation copying can get pretty long when using an overloaded + # function declaration + if (not line.startswith('#include') and not is_header_guard and + not re.match(r'^\s*//.*http(s?)://\S*$', line) and + not re.match(r'^\s*//\s*[^\s]*$', line) and + not re.match(r'^// \$Id:.*#[0-9]+ \$$', line) and + not re.match(r'^\s*/// [@\\](copydoc|copydetails|copybrief) .*$', line)): + line_width = GetLineWidth(line) + if line_width > _line_length: + error(filename, linenum, 'whitespace/line_length', 2, + f'Lines should be <= {_line_length} characters long') + + if (cleansed_line.count(';') > 1 and + # allow simple single line lambdas + not re.match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}\n\r]*\}', + line) and + # for loops are allowed two ;'s (and may run over two lines). + cleansed_line.find('for') == -1 and + (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or + GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and + # It's ok to have many commands in a switch case that fits in 1 line + not ((cleansed_line.find('case ') != -1 or + cleansed_line.find('default:') != -1) and + cleansed_line.find('break;') != -1)): + error(filename, linenum, 'whitespace/newline', 0, + 'More than one command on the same line') + + # Some more style checks + CheckBraces(filename, clean_lines, linenum, error) + CheckTrailingSemicolon(filename, clean_lines, linenum, error) + CheckEmptyBlockBody(filename, clean_lines, linenum, error) + CheckSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckOperatorSpacing(filename, clean_lines, linenum, error) + CheckParenthesisSpacing(filename, clean_lines, linenum, error) + CheckCommaSpacing(filename, clean_lines, linenum, error) + CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) + CheckCheck(filename, clean_lines, linenum, error) + CheckAltTokens(filename, clean_lines, linenum, error) + classinfo = nesting_state.InnermostClass() + if classinfo: + CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) - fileinfo_h = FileInfo(filename_h) - if not IsHeaderExtension(fileinfo_h.Extension().lstrip(".")): - return (False, "") - filename_cc = filename_cc[: -(len(fileinfo_cc.Extension()))] - matched_test_suffix = Search(_TEST_FILE_SUFFIX, fileinfo_cc.BaseName()) - if matched_test_suffix: - filename_cc = filename_cc[: -len(matched_test_suffix.group(1))] +_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') +# Matches the first component of a filename delimited by -s and _s. That is: +# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' +_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') - filename_cc = filename_cc.replace("/public/", "/") - filename_cc = filename_cc.replace("/internal/", "/") - filename_h = filename_h[: -(len(fileinfo_h.Extension()))] - if filename_h.endswith("-inl"): - filename_h = filename_h[: -len("-inl")] - filename_h = filename_h.replace("/public/", "/") - filename_h = filename_h.replace("/internal/", "/") +def _DropCommonSuffixes(filename): + """Drops common suffixes like _test.cc or -inl.h from filename. + + For example: + >>> _DropCommonSuffixes('foo/foo-inl.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/bar/foo.cc') + 'foo/bar/foo' + >>> _DropCommonSuffixes('foo/foo_internal.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') + 'foo/foo_unusualinternal' + + Args: + filename: The input filename. + + Returns: + The filename with the common suffix removed. + """ + for suffix in itertools.chain( + (f"{test_suffix.lstrip('_')}.{ext}" + for test_suffix, ext in itertools.product(_test_suffixes, GetNonHeaderExtensions())), + (f'{suffix}.{ext}' + for suffix, ext in itertools.product(['inl', 'imp', 'internal'], GetHeaderExtensions()))): + if (filename.endswith(suffix) and len(filename) > len(suffix) and + filename[-len(suffix) - 1] in ('-', '_')): + return filename[:-len(suffix) - 1] + return os.path.splitext(filename)[0] + + +def _ClassifyInclude(fileinfo, include, used_angle_brackets, include_order="default"): + """Figures out what kind of header 'include' is. + + Args: + fileinfo: The current file cpplint is running over. A FileInfo instance. + include: The path to a #included file. + used_angle_brackets: True if the #include used <> rather than "". + include_order: "default" or other value allowed in program arguments + + Returns: + One of the _XXX_HEADER constants. + + For example: + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) + _C_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) + _CPP_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', True, "standardcfirst") + _OTHER_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) + _LIKELY_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), + ... 'bar/foo_other_ext.h', False) + _POSSIBLE_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) + _OTHER_HEADER + """ + # This is a list of all standard c++ header files, except + # those already checked for above. + is_cpp_header = include in _CPP_HEADERS + + # Mark include as C header if in list or in a known folder for standard-ish C headers. + is_std_c_header = (include_order == "default") or (include in _C_HEADERS + # additional linux glibc header folders + or re.search(rf'(?:{"|".join(C_STANDARD_HEADER_FOLDERS)})\/.*\.h', include)) + + # Headers with C++ extensions shouldn't be considered C system headers + include_ext = os.path.splitext(include)[1] + is_system = used_angle_brackets and include_ext not in ['.hh', '.hpp', '.hxx', '.h++'] + + if is_system: + if is_cpp_header: + return _CPP_SYS_HEADER + if is_std_c_header: + return _C_SYS_HEADER + else: + return _OTHER_SYS_HEADER + + # If the target file and the include we're checking share a + # basename when we drop common extensions, and the include + # lives in . , then it's likely to be owned by the target file. + target_dir, target_base = ( + os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) + include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) + target_dir_pub = os.path.normpath(target_dir + '/../public') + target_dir_pub = target_dir_pub.replace('\\', '/') + if target_base == include_base and ( + include_dir == target_dir or + include_dir == target_dir_pub): + return _LIKELY_MY_HEADER + + # If the target and include share some initial basename + # component, it's possible the target is implementing the + # include, so it's allowed to be first, but we'll never + # complain if it's not there. + target_first_component = _RE_FIRST_COMPONENT.match(target_base) + include_first_component = _RE_FIRST_COMPONENT.match(include_base) + if (target_first_component and include_first_component and + target_first_component.group(0) == + include_first_component.group(0)): + return _POSSIBLE_MY_HEADER + + return _OTHER_HEADER - files_belong_to_same_module = filename_cc.endswith(filename_h) - common_path = "" - if files_belong_to_same_module: - common_path = filename_cc[: -len(filename_h)] - return files_belong_to_same_module, common_path -def UpdateIncludeState(filename, include_dict, io=codecs): - """Fill up the include_dict with new includes found from the file. +def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): + """Check rules that are applicable to #include lines. + + Strings on #include lines are NOT removed from elided line, to make + certain tasks easier. However, to prevent false positives, checks + applicable to #include lines in CheckLanguage must be put here. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + include_state: An _IncludeState instance in which the headers are inserted. + error: The function to call with any errors found. + """ + fileinfo = FileInfo(filename) + line = clean_lines.lines[linenum] + + # "include" should use the new style "foo/bar.h" instead of just "bar.h" + # Only do this check if the included header follows google naming + # conventions. If not, assume that it's a 3rd party API that + # requires special include conventions. + # + # We also make an exception for Lua headers, which follow google + # naming convention but not the include convention. + # JMM: Disabling. We do not maintain this convention in partthenon + match = re.match(r'#include\s*"([^/]+\.(.*))"', line) + if False and match: + if (IsHeaderExtension(match.group(2)) and + not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1))): + error(filename, linenum, 'build/include_subdir', 4, + 'Include the directory when naming header files') + + # we shouldn't include a file more than once. actually, there are a + # handful of instances where doing so is okay, but in general it's + # not. + match = _RE_PATTERN_INCLUDE.search(line) + if match: + include = match.group(2) + used_angle_brackets = match.group(1) == '<' + duplicate_line = include_state.FindHeader(include) + if duplicate_line >= 0: + error(filename, linenum, 'build/include', 4, + f'"{include}" already included at {filename}:{duplicate_line}') + return + + for extension in GetNonHeaderExtensions(): + if (include.endswith('.' + extension) and + os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)): + error(filename, linenum, 'build/include', 4, + 'Do not include .' + extension + ' files from other packages') + return - Args: - filename: the name of the header to read. - include_dict: a dictionary in which the headers are inserted. - io: The io factory to use to read the file. Provided for testability. + # We DO want to include a 3rd party looking header if it matches the + # filename. Otherwise we get an erroneous error "...should include its + # header" error later. + third_src_header = False + for ext in GetHeaderExtensions(): + basefilename = filename[0:len(filename) - len(fileinfo.Extension())] + headerfile = basefilename + '.' + ext + headername = FileInfo(headerfile).RepositoryName() + if headername in include or include in headername: + third_src_header = True + break + + if third_src_header or not _THIRD_PARTY_HEADERS_PATTERN.match(include): + include_state.include_list[-1].append((include, linenum)) + + # We want to ensure that headers appear in the right order: + # 1) for foo.cc, foo.h (preferred location) + # 2) c system files + # 3) cpp system files + # 4) for foo.cc, foo.h (deprecated location) + # 5) other google headers + # + # We classify each include statement as one of those 5 types + # using a number of techniques. The include_state object keeps + # track of the highest type seen, and complains if we see a + # lower type after that. + error_message = include_state.CheckNextIncludeOrder( + _ClassifyInclude(fileinfo, include, used_angle_brackets, _include_order)) + if error_message: + error(filename, linenum, 'build/include_order', 4, + f'{error_message}. Should be: {fileinfo.BaseName()}.h, c system,' + ' c++ system, other.') + canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) + if not include_state.IsInAlphabeticalOrder( + clean_lines, linenum, canonical_include): + error(filename, linenum, 'build/include_alpha', 4, + f'Include "{include}" not in alphabetical order') + include_state.SetLastHeader(canonical_include) - Returns: - True if a header was successfully added. False otherwise. - """ - headerfile = None - try: - headerfile = io.open(filename, "r", "utf8", "replace") - except IOError: - return False - linenum = 0 - for line in headerfile: - linenum += 1 - clean_line = CleanseComments(line) - match = _RE_PATTERN_INCLUDE.search(clean_line) - if match: - include = match.group(2) - include_dict.setdefault(include, linenum) - return True -def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, io=codecs): - """Reports for missing stl includes. +def _GetTextInside(text, start_pattern): + r"""Retrieves all the text between matching open and close parentheses. + + Given a string of lines and a regular expression string, retrieve all the text + following the expression and between opening punctuation symbols like + (, [, or {, and the matching close-punctuation symbol. This properly nested + occurrences of the punctuations, so for the text like + printf(a(), b(c())); + a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. + start_pattern must match string having an open punctuation symbol at the end. + + Args: + text: The lines to extract text. Its comments and strings must be elided. + It can be single line and can span multiple lines. + start_pattern: The regexp string indicating where to start extracting + the text. + Returns: + The extracted text. + None if either the opening string or ending punctuation could not be found. + """ + # TODO(unknown): Audit cpplint.py to see what places could be profitably + # rewritten to use _GetTextInside (and use inferior regexp matching today). + + # Give opening punctuations to get the matching close-punctuations. + matching_punctuation = {'(': ')', '{': '}', '[': ']'} + closing_punctuation = set(dict.values(matching_punctuation)) + + # Find the position to start extracting text. + match = re.search(start_pattern, text, re.M) + if not match: # start_pattern not found in text. + return None + start_position = match.end(0) + + assert start_position > 0, ( + 'start_pattern must ends with an opening punctuation.') + assert text[start_position - 1] in matching_punctuation, ( + 'start_pattern must ends with an opening punctuation.') + # Stack of closing punctuations we expect to have in text after position. + punctuation_stack = [matching_punctuation[text[start_position - 1]]] + position = start_position + while punctuation_stack and position < len(text): + if text[position] == punctuation_stack[-1]: + punctuation_stack.pop() + elif text[position] in closing_punctuation: + # A closing punctuation without matching opening punctuations. + return None + elif text[position] in matching_punctuation: + punctuation_stack.append(matching_punctuation[text[position]]) + position += 1 + if punctuation_stack: + # Opening punctuations left without matching close-punctuations. + return None + # punctuations match. + return text[start_position:position - 1] - This function will output warnings to make sure you are including the headers - necessary for the stl containers and functions that you use. We only give one - reason to include a header. For example, if you use both equal_to<> and - less<> in a .h file, only one (the latter in the file) of these will be - reported as a reason to include the . - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - include_state: An _IncludeState instance. - error: The function to call with any errors found. - io: The IO factory to use to read the header file. Provided for unittest - injection. - """ - required = {} # A map of header name to linenumber and the template entity. - # Example of required: { '': (1219, 'less<>') } +# Patterns for matching call-by-reference parameters. +# +# Supports nested templates up to 2 levels deep using this messy pattern: +# < (?: < (?: < [^<>]* +# > +# | [^<>] )* +# > +# | [^<>] )* +# > +_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* +_RE_PATTERN_TYPE = ( + r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' + r'(?:\w|' + r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' + r'::)+') +# A call-by-reference parameter ends with '& identifier'. +_RE_PATTERN_REF_PARAM = re.compile( + r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' + r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') +# A call-by-const-reference parameter either ends with 'const& identifier' +# or looks like 'const type& identifier' when 'type' is atomic. +_RE_PATTERN_CONST_REF_PARAM = ( + r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + + r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') +# Stream types. +_RE_PATTERN_REF_STREAM_PARAM = ( + r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')') + + +def CheckLanguage(filename, clean_lines, linenum, file_extension, + include_state, nesting_state, error): + """Checks rules from the 'C++ language rules' section of cppguide.html. + + Some of these rules are hard to test (function overloading, using + uint32_t inappropriately), but we do the best we can. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + include_state: An _IncludeState instance in which the headers are inserted. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # If the line is empty or consists of entirely a comment, no need to + # check it. + line = clean_lines.elided[linenum] + if not line: + return + + match = _RE_PATTERN_INCLUDE.search(line) + if match: + CheckIncludeLine(filename, clean_lines, linenum, include_state, error) + return + + # Reset include state across preprocessor directives. This is meant + # to silence warnings for conditional includes. + match = re.match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) + if match: + include_state.ResetSection(match.group(1)) + + + # Perform other checks now that we are sure that this is not an include line + CheckCasts(filename, clean_lines, linenum, error) + CheckGlobalStatic(filename, clean_lines, linenum, error) + CheckPrintf(filename, clean_lines, linenum, error) + + if IsHeaderExtension(file_extension): + # TODO(unknown): check that 1-arg constructors are explicit. + # How to tell it's a constructor? + # (handled in CheckForNonStandardConstructs for now) + # TODO(unknown): check that classes declare or disable copy/assign + # (level 1 error) + pass - for linenum in xrange(clean_lines.NumLines()): - line = clean_lines.elided[linenum] - if not line or line[0] == "#": - continue + # Check if people are using the verboten C basic types. The only exception + # we regularly allow is "unsigned short port" for port. + if re.search(r'\bshort port\b', line): + if not re.search(r'\bunsigned short port\b', line): + error(filename, linenum, 'runtime/int', 4, + 'Use "unsigned short" for ports, not "short"') + else: + match = re.search(r'\b(short|long(?! +double)|long long)\b', line) + if match: + error(filename, linenum, 'runtime/int', 4, + f'Use int16_t/int64_t/etc, rather than the C type {match.group(1)}') + + # Check if some verboten operator overloading is going on + # TODO(unknown): catch out-of-line unary operator&: + # class X {}; + # int operator&(const X& x) { return 42; } // unary operator& + # The trick is it's hard to tell apart from binary operator&: + # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& + if re.search(r'\boperator\s*&\s*\(\s*\)', line): + error(filename, linenum, 'runtime/operator', 4, + 'Unary operator& is dangerous. Do not use it.') + + # Check for suspicious usage of "if" like + # } if (a == b) { + if re.search(r'\}\s*if\s*\(', line): + error(filename, linenum, 'readability/braces', 4, + 'Did you mean "else if"? If not, start a new line for "if".') + + # Check for potential format string bugs like printf(foo). + # We constrain the pattern not to pick things like DocidForPrintf(foo). + # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) + # TODO(unknown): Catch the following case. Need to change the calling + # convention of the whole function to process multiple line to handle it. + # printf( + # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); + printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') + if printf_args: + match = re.match(r'([\w.\->()]+)$', printf_args) + if match and match.group(1) != '__VA_ARGS__': + function_name = re.search(r'\b((?:string)?printf)\s*\(', + line, re.I).group(1) + error(filename, linenum, 'runtime/printf', 4, + 'Potential format string bug. Do' + f' {function_name}("%s", {match.group(1)}) instead.') + + # Check for potential memset bugs like memset(buf, sizeof(buf), 0). + match = re.search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) + if match and not re.match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): + error(filename, linenum, 'runtime/memset', 4, + f'Did you mean "memset({match.group(1)}, 0, {match.group(2)})"?') + + if re.search(r'\busing namespace\b', line): + if re.search(r'\bliterals\b', line): + error(filename, linenum, 'build/namespaces_literals', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') + else: + error(filename, linenum, 'build/namespaces', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') + + # Detect variable-length arrays. + match = re.match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) + if (match and match.group(2) != 'return' and match.group(2) != 'delete' and + match.group(3).find(']') == -1): + # Split the size using space and arithmetic operators as delimiters. + # If any of the resulting tokens are not compile time constants then + # report the error. + tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) + is_const = True + skip_next = False + for tok in tokens: + if skip_next: + skip_next = False + continue + + if re.search(r'sizeof\(.+\)', tok): continue + if re.search(r'arraysize\(\w+\)', tok): continue + + tok = tok.lstrip('(') + tok = tok.rstrip(')') + if not tok: continue + if re.match(r'\d+', tok): continue + if re.match(r'0[xX][0-9a-fA-F]+', tok): continue + if re.match(r'k[A-Z0-9]\w*', tok): continue + if re.match(r'(.+::)?k[A-Z0-9]\w*', tok): continue + if re.match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue + # A catch all for tricky sizeof cases, including 'sizeof expression', + # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' + # requires skipping the next token because we split on ' ' and '*'. + if tok.startswith('sizeof'): + skip_next = True + continue + is_const = False + break + if not is_const: + error(filename, linenum, 'runtime/arrays', 1, + 'Do not use variable-length arrays. Use an appropriately named ' + "('k' followed by CamelCase) compile-time constant for the size.") + + # Check for use of unnamed namespaces in header files. Registration + # macros are typically OK, so we allow use of "namespace {" on lines + # that end with backslashes. + if (IsHeaderExtension(file_extension) + and re.search(r'\bnamespace\s*{', line) + and line[-1] != '\\'): + error(filename, linenum, 'build/namespaces_headers', 4, + 'Do not use unnamed namespaces in header files. See ' + 'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' + ' for more information.') - # String is special -- it is a non-templatized type in STL. - matched = _RE_PATTERN_STRING.search(line) - if matched: - # Don't warn about strings in non-STL namespaces: - # (We check only the first match per line; good enough.) - prefix = line[: matched.start()] - if prefix.endswith("std::") or not prefix.endswith("::"): - required[""] = (linenum, "string") - - for pattern, template, header in _re_pattern_headers_maybe_templates: - if pattern.search(line): - required[header] = (linenum, template) - - # The following function is just a speed up, no semantics are changed. - if not "<" in line: # Reduces the cpu time usage by skipping lines. - continue - for pattern, template, header in _re_pattern_templates: - matched = pattern.search(line) - if matched: - # Don't warn about IWYU in non-STL namespaces: - # (We check only the first match per line; good enough.) - prefix = line[: matched.start()] - if prefix.endswith("std::") or not prefix.endswith("::"): - required[header] = (linenum, template) - - # The policy is that if you #include something in foo.h you don't need to - # include it again in foo.cc. Here, we will look at possible includes. - # Let's flatten the include_state include_list and copy it into a dictionary. - include_dict = dict( - [item for sublist in include_state.include_list for item in sublist] - ) +def CheckGlobalStatic(filename, clean_lines, linenum, error): + """Check for unsafe global or static objects. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Match two lines at a time to support multiline declarations + if linenum + 1 < clean_lines.NumLines() and not re.search(r'[;({]', line): + line += clean_lines.elided[linenum + 1].strip() + + # Check for people declaring static/global STL strings at the top level. + # This is dangerous because the C++ language does not guarantee that + # globals with constructors are initialized before the first access, and + # also because globals can be destroyed when some threads are still running. + # TODO(unknown): Generalize this to also find static unique_ptr instances. + # TODO(unknown): File bugs for clang-tidy to find these. + match = re.match( + r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +' + r'([a-zA-Z0-9_:]+)\b(.*)', + line) + + # Remove false positives: + # - String pointers (as opposed to values). + # string *pointer + # const string *pointer + # string const *pointer + # string *const pointer + # + # - Functions and template specializations. + # string Function(... + # string Class::Method(... + # + # - Operators. These are matched separately because operator names + # cross non-word boundaries, and trying to match both operators + # and functions at the same time would decrease accuracy of + # matching identifiers. + # string Class::operator*() + if (match and + not re.search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and + not re.search(r'\boperator\W', line) and + not re.match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))): + if re.search(r'\bconst\b', line): + error(filename, linenum, 'runtime/string', 4, + 'For a static/global string constant, use a C style string instead:' + f' "{match.group(1)}char{match.group(2) or ""} {match.group(3)}[]".') + else: + error(filename, linenum, 'runtime/string', 4, + 'Static/global string variables are not permitted.') - # Did we find the header for this file (if any) and successfully load it? - header_found = False - - # Use the absolute path so that matching works properly. - abs_filename = FileInfo(filename).FullName() - - # For Emacs's flymake. - # If cpplint is invoked from Emacs's flymake, a temporary file is generated - # by flymake and that file name might end with '_flymake.cc'. In that case, - # restore original file name here so that the corresponding header file can be - # found. - # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h' - # instead of 'foo_flymake.h' - abs_filename = re.sub(r"_flymake\.cc$", ".cc", abs_filename) - - # include_dict is modified during iteration, so we iterate over a copy of - # the keys. - header_keys = list(include_dict.keys()) - for header in header_keys: - (same_module, common_path) = FilesBelongToSameModule(abs_filename, header) - fullpath = common_path + header - if same_module and UpdateIncludeState(fullpath, include_dict, io): - header_found = True - - # If we can't find the header file for a .cc, assume it's because we don't - # know where to look. In that case we'll give up as we're not sure they - # didn't include it in the .h file. - # TODO(unknown): Do a better job of finding .h files so we are confident that - # not having the .h file means there isn't one. - if not header_found: - for extension in GetNonHeaderExtensions(): - if filename.endswith("." + extension): - return - - # All the lines have been processed, report the errors found. - for required_header_unstripped in sorted(required, key=required.__getitem__): - template = required[required_header_unstripped][1] - if required_header_unstripped.strip('<>"') not in include_dict: - error( - filename, - required[required_header_unstripped][0], - "build/include_what_you_use", - 4, - "Add #include " + required_header_unstripped + " for " + template, - ) - - -_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r"\bmake_pair\s*<") + if (re.search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or + re.search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)): + error(filename, linenum, 'runtime/init', 4, + 'You seem to be initializing a member variable with itself.') -def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): - """Check that make_pair's template arguments are deduced. +def CheckPrintf(filename, clean_lines, linenum, error): + """Check for printf related issues. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # When snprintf is used, the second argument shouldn't be a literal. + match = re.search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) + if match and match.group(2) != '0': + # If 2nd arg is zero, snprintf is used to calculate size. + error(filename, linenum, 'runtime/printf', 3, 'If you can, use' + f' sizeof({match.group(1)}) instead of {match.group(2)}' + ' as the 2nd arg to snprintf.') + + # Check if some verboten C functions are being used. + if re.search(r'\bsprintf\s*\(', line): + error(filename, linenum, 'runtime/printf', 5, + 'Never use sprintf. Use snprintf instead.') + match = re.search(r'\b(strcpy|strcat)\s*\(', line) + if match: + error(filename, linenum, 'runtime/printf', 4, + f'Almost always, snprintf is better than {match.group(1)}') - G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are - specified explicitly, and such use isn't intended in any case. - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) +def IsDerivedFunction(clean_lines, linenum): + """Check if current line contains an inherited function. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains a function with "override" + virt-specifier. + """ + # Scan back a few lines for start of current function + for i in range(linenum, max(-1, linenum - 10), -1): + match = re.match(r'^([^()]*\w+)\(', clean_lines.elided[i]) if match: - error( - filename, - linenum, - "build/explicit_make_pair", - 4, # 4 = high confidence - "For C++11-compatibility, omit template arguments from make_pair" - " OR use pair directly OR if appropriate, construct a pair directly", - ) + # Look for "override" after the matching closing parenthesis + line, _, closing_paren = CloseExpression( + clean_lines, i, len(match.group(1))) + return (closing_paren >= 0 and + re.search(r'\boverride\b', line[closing_paren:])) + return False -def CheckRedundantVirtual(filename, clean_lines, linenum, error): - """Check if line contains a redundant "virtual" function-specifier. +def IsOutOfLineMethodDefinition(clean_lines, linenum): + """Check if current line contains an out-of-line method definition. - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Look for "virtual" on current line. - line = clean_lines.elided[linenum] - virtual = Match(r"^(.*)(\bvirtual\b)(.*)$", line) - if not virtual: - return + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains an out-of-line method definition. + """ + # Scan back a few lines for start of current function + for i in range(linenum, max(-1, linenum - 10), -1): + if re.match(r'^([^()]*\w+)\(', clean_lines.elided[i]): + return re.match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None + return False - # Ignore "virtual" keywords that are near access-specifiers. These - # are only used in class base-specifier and do not apply to member - # functions. - if Search(r"\b(public|protected|private)\s+$", virtual.group(1)) or Match( - r"^\s+(public|protected|private)\b", virtual.group(3) - ): - return - # Ignore the "virtual" keyword from virtual base classes. Usually - # there is a column on the same line in these cases (virtual base - # classes are rare in google3 because multiple inheritance is rare). - if Match(r"^.*[^:]:[^:].*$", line): +def IsInitializerList(clean_lines, linenum): + """Check if current line is inside constructor initializer list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line appears to be inside constructor initializer + list, False otherwise. + """ + for i in range(linenum, 1, -1): + line = clean_lines.elided[i] + if i == linenum: + remove_function_body = re.match(r'^(.*)\{\s*$', line) + if remove_function_body: + line = remove_function_body.group(1) + + if re.search(r'\s:\s*\w+[({]', line): + # A lone colon tend to indicate the start of a constructor + # initializer list. It could also be a ternary operator, which + # also tend to appear in constructor initializer lists as + # opposed to parameter lists. + return True + if re.search(r'\}\s*,\s*$', line): + # A closing brace followed by a comma is probably the end of a + # brace-initialized member in constructor initializer list. + return True + if re.search(r'[{};]\s*$', line): + # Found one of the following: + # - A closing brace or semicolon, probably the end of the previous + # function. + # - An opening brace, probably the start of current class or namespace. + # + # Current line is probably not inside an initializer list since + # we saw one of those things without seeing the starting colon. + return False + + # Got to the beginning of the file without seeing the start of + # constructor initializer list. + return False + + +def CheckForNonConstReference(filename, clean_lines, linenum, + nesting_state, error): + """Check for non-const references. + + Separate from CheckLanguage since it scans backwards from current + line, instead of scanning forward. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # Do nothing if there is no '&' on current line. + line = clean_lines.elided[linenum] + if '&' not in line: + return + + # If a function is inherited, current function doesn't have much of + # a choice, so any non-const references should not be blamed on + # derived function. + if IsDerivedFunction(clean_lines, linenum): + return + + # Don't warn on out-of-line method definitions, as we would warn on the + # in-line declaration, if it isn't marked with 'override'. + if IsOutOfLineMethodDefinition(clean_lines, linenum): + return + + # Long type names may be broken across multiple lines, usually in one + # of these forms: + # LongType + # ::LongTypeContinued &identifier + # LongType:: + # LongTypeContinued &identifier + # LongType< + # ...>::LongTypeContinued &identifier + # + # If we detected a type split across two lines, join the previous + # line to current line so that we can match const references + # accordingly. + # + # Note that this only scans back one line, since scanning back + # arbitrary number of lines would be expensive. If you have a type + # that spans more than 2 lines, please use a typedef. + if linenum > 1: + previous = None + if re.match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): + # previous_line\n + ::current_line + previous = re.search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', + clean_lines.elided[linenum - 1]) + elif re.match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): + # previous_line::\n + current_line + previous = re.search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', + clean_lines.elided[linenum - 1]) + if previous: + line = previous.group(1) + line.lstrip() + else: + # Check for templated parameter that is split across multiple lines + endpos = line.rfind('>') + if endpos > -1: + (_, startline, startpos) = ReverseCloseExpression( + clean_lines, linenum, endpos) + if startpos > -1 and startline < linenum: + # Found the matching < on an earlier line, collect all + # pieces up to current line. + line = '' + for i in range(startline, linenum + 1): + line += clean_lines.elided[i].strip() + + # Check for non-const references in function parameters. A single '&' may + # found in the following places: + # inside expression: binary & for bitwise AND + # inside expression: unary & for taking the address of something + # inside declarators: reference parameter + # We will exclude the first two cases by checking that we are not inside a + # function body, including one that was just introduced by a trailing '{'. + # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. + if (nesting_state.previous_stack_top and + not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or + isinstance(nesting_state.previous_stack_top, _NamespaceInfo))): + # Not at toplevel, not within a class, and not within a namespace + return + + # Avoid initializer lists. We only need to scan back from the + # current line for something that starts with ':'. + # + # We don't need to check the current line, since the '&' would + # appear inside the second set of parentheses on the current line as + # opposed to the first set. + if linenum > 0: + for i in range(linenum - 1, max(0, linenum - 10), -1): + previous_line = clean_lines.elided[i] + if not re.search(r'[),]\s*$', previous_line): + break + if re.match(r'^\s*:\s+\S', previous_line): return - # Look for the next opening parenthesis. This is the start of the - # parameter list (possibly on the next line shortly after virtual). - # TODO(unknown): doesn't work if there are virtual functions with - # decltype() or other things that use parentheses, but csearch suggests - # that this is rare. - end_col = -1 - end_line = -1 - start_col = len(virtual.group(2)) - for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())): - line = clean_lines.elided[start_line][start_col:] - parameter_list = Match(r"^([^(]*)\(", line) - if parameter_list: - # Match parentheses to find the end of the parameter list - (_, end_line, end_col) = CloseExpression( - clean_lines, start_line, start_col + len(parameter_list.group(1)) - ) - break - start_col = 0 - - if end_col < 0: - return # Couldn't find end of parameter list, give up - - # Look for "override" or "final" after the parameter list - # (possibly on the next few lines). - for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())): - line = clean_lines.elided[i][end_col:] - match = Search(r"\b(override|final)\b", line) - if match: - error( - filename, - linenum, - "readability/inheritance", - 4, - ( - '"virtual" is redundant since function is ' - 'already declared as "%s"' % match.group(1) - ), - ) - - # Set end_col to check whole lines after we are done with the - # first line. - end_col = 0 - if Search(r"[^\w]\s*$", line): - break + # Avoid preprocessors + if re.search(r'\\\s*$', line): + return + + # Avoid constructor initializer lists + if IsInitializerList(clean_lines, linenum): + return + + # We allow non-const references in a few standard places, like functions + # called "swap()" or iostream operators like "<<" or ">>". Do not check + # those function parameters. + # + # We also accept & in static_assert, which looks like a function but + # it's actually a declaration expression. + allowed_functions = (r'(?:[sS]wap(?:<\w:+>)?|' + r'operator\s*[<>][<>]|' + r'static_assert|COMPILE_ASSERT' + r')\s*\(') + if re.search(allowed_functions, line): + return + elif not re.search(r'\S+\([^)]*$', line): + # Don't see an allowed function on this line. Actually we + # didn't see any function name on this line, so this is likely a + # multi-line parameter list. Try a bit harder to catch this case. + for i in range(2): + if (linenum > i and + re.search(allowed_functions, clean_lines.elided[linenum - i - 1])): + return + decls = re.sub(r'{[^}]*}', ' ', line) # exclude function body + for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): + if (not re.match(_RE_PATTERN_CONST_REF_PARAM, parameter) and + not re.match(_RE_PATTERN_REF_STREAM_PARAM, parameter)): + error(filename, linenum, 'runtime/references', 2, + 'Is this a non-const reference? ' + 'If so, make const or use a pointer: ' + + re.sub(' *<', '<', parameter)) -def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): - """Check if line contains a redundant "override" or "final" virt-specifier. - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Look for closing parenthesis nearby. We need one to confirm where - # the declarator ends and where the virt-specifier starts to avoid - # false positives. - line = clean_lines.elided[linenum] - declarator_end = line.rfind(")") - if declarator_end >= 0: - fragment = line[declarator_end:] +def CheckCasts(filename, clean_lines, linenum, error): + """Various cast related checks. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Check to see if they're using an conversion function cast. + # I just try to capture the most common basic types, though there are more. + # Parameterless conversion functions, such as bool(), are allowed as they are + # probably a member operator declaration or default constructor. + match = re.search( + r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b' + r'(int|float|double|bool|char|int16_t|uint16_t|int32_t|uint32_t|int64_t|uint64_t)' + r'(\([^)].*)', line) + expecting_function = ExpectingFunctionArgs(clean_lines, linenum) + if match and not expecting_function: + matched_type = match.group(2) + + # matched_new_or_template is used to silence two false positives: + # - New operators + # - Template arguments with function types + # + # For template arguments, we match on types immediately following + # an opening bracket without any spaces. This is a fast way to + # silence the common case where the function type is the first + # template argument. False negative with less-than comparison is + # avoided because those operators are usually followed by a space. + # + # function // bracket + no space = false positive + # value < double(42) // bracket + space = true positive + matched_new_or_template = match.group(1) + + # Avoid arrays by looking for brackets that come after the closing + # parenthesis. + if re.match(r'\([^()]+\)\s*\[', match.group(3)): + return + + # Other things to ignore: + # - Function pointers + # - Casts to pointer types + # - Placement new + # - Alias declarations + matched_funcptr = match.group(3) + if (matched_new_or_template is None and + not (matched_funcptr and + (re.match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', + matched_funcptr) or + matched_funcptr.startswith('(*)'))) and + not re.match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and + not re.search(r'new\(\S+\)\s*' + matched_type, line)): + error(filename, linenum, 'readability/casting', 4, + 'Using deprecated casting style. ' + f'Use static_cast<{matched_type}>(...) instead') + + if not expecting_function: + CheckCStyleCast(filename, clean_lines, linenum, 'static_cast', + r'\((int|float|double|bool|char|u?int(16|32|64)_t|size_t)\)', error) + + # This doesn't catch all cases. Consider (const char * const)"hello". + # + # (char *) "foo" should always be a const_cast (reinterpret_cast won't + # compile). + if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast', + r'\((char\s?\*+\s?)\)\s*"', error): + pass + else: + # Check pointer casts for other than string constants + CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast', + r'\((\w+\s?\*+\s?)\)', error) + + # In addition, we look for people taking the address of a cast. This + # is dangerous -- casts can assign to temporaries, so the pointer doesn't + # point where you think. + # + # Some non-identifier character is required before the '&' for the + # expression to be recognized as a cast. These are casts: + # expression = &static_cast(temporary()); + # function(&(int*)(temporary())); + # + # This is not a cast: + # reference_type&(int* function_param); + match = re.search( + r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|' + r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line) + if match: + # Try a better error message when the & is bound to something + # dereferenced by the casted pointer, as opposed to the casted + # pointer itself. + parenthesis_error = False + match = re.match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line) + if match: + _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) + if x1 >= 0 and clean_lines.elided[y1][x1] == '(': + _, y2, x2 = CloseExpression(clean_lines, y1, x1) + if x2 >= 0: + extended_line = clean_lines.elided[y2][x2:] + if y2 < clean_lines.NumLines() - 1: + extended_line += clean_lines.elided[y2 + 1] + if re.match(r'\s*(?:->|\[)', extended_line): + parenthesis_error = True + + if parenthesis_error: + error(filename, linenum, 'readability/casting', 4, + ('Are you taking an address of something dereferenced ' + 'from a cast? Wrapping the dereferenced expression in ' + 'parentheses will make the binding more obvious')) else: - if linenum > 1 and clean_lines.elided[linenum - 1].rfind(")") >= 0: - fragment = line - else: - return - - # Check that at most one of "override" or "final" is present, not both - if Search(r"\boverride\b", fragment) and Search(r"\bfinal\b", fragment): - error( - filename, - linenum, - "readability/inheritance", - 4, - ( - '"override" is redundant since function is ' - 'already declared as "final"' - ), - ) + error(filename, linenum, 'runtime/casting', 4, + ('Are you taking an address of a cast? ' + 'This is dangerous: could be a temp var. ' + 'Take the address before doing the cast, rather than after')) -# Returns true if we are at a new block, and it is directly -# inside of a namespace. -def IsBlockInNameSpace(nesting_state, is_forward_declaration): - """Checks that the new block is directly in a namespace. +def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): + """Checks for a C-style cast by looking for the pattern. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + cast_type: The string for the C++ cast to recommend. This is either + reinterpret_cast, static_cast, or const_cast, depending. + pattern: The regular expression used to find C-style casts. + error: The function to call with any errors found. + + Returns: + True if an error was emitted. + False otherwise. + """ + line = clean_lines.elided[linenum] + match = re.search(pattern, line) + if not match: + return False - Args: - nesting_state: The _NestingState object that contains info about our state. - is_forward_declaration: If the class is a forward declared class. - Returns: - Whether or not the new block is directly in a namespace. - """ - if is_forward_declaration: - return len(nesting_state.stack) >= 1 and ( - isinstance(nesting_state.stack[-1], _NamespaceInfo) - ) - - return ( - len(nesting_state.stack) > 1 - and nesting_state.stack[-1].check_namespace_indentation - and isinstance(nesting_state.stack[-2], _NamespaceInfo) - ) + # Exclude lines with keywords that tend to look like casts + context = line[0:match.start(1) - 1] + if re.match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): + return False + # Try expanding current context to see if we one level of + # parentheses inside a macro. + if linenum > 0: + for i in range(linenum - 1, max(0, linenum - 5), -1): + context = clean_lines.elided[i] + context + if re.match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): + return False -def ShouldCheckNamespaceIndentation( - nesting_state, is_namespace_indent_item, raw_lines_no_comments, linenum -): - """This method determines if we should apply our namespace indentation check. + # operator++(int) and operator--(int) + if (context.endswith(' operator++') or context.endswith(' operator--') or + context.endswith('::operator++') or context.endswith('::operator--')): + return False - Args: - nesting_state: The current nesting state. - is_namespace_indent_item: If we just put a new class on the stack, True. - If the top of the stack is not a class, or we did not recently - add the class, False. - raw_lines_no_comments: The lines without the comments. - linenum: The current line number we are processing. + # A single unnamed argument for a function tends to look like old style cast. + # If we see those, don't issue warnings for deprecated casts. + remainder = line[match.end(0):] + if re.match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)', + remainder): + return False - Returns: - True if we should apply our namespace indentation check. Currently, it - only works for classes and namespaces inside of a namespace. - """ + # At this point, all that should be left is actual casts. + error(filename, linenum, 'readability/casting', 4, + f'Using C-style cast. Use {cast_type}<{match.group(1)}>(...) instead') - is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, linenum) + return True - if not (is_namespace_indent_item or is_forward_declaration): - return False - # If we are in a macro, we do not want to check the namespace indentation. - if IsMacroDefinition(raw_lines_no_comments, linenum): - return False +def ExpectingFunctionArgs(clean_lines, linenum): + """Checks whether where function type arguments are expected. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + + Returns: + True if the line at 'linenum' is inside something that expects arguments + of function types. + """ + line = clean_lines.elided[linenum] + return (re.match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or + (linenum >= 2 and + (re.match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', + clean_lines.elided[linenum - 1]) or + re.match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', + clean_lines.elided[linenum - 2]) or + re.search(r'\bstd::m?function\s*\<\s*$', + clean_lines.elided[linenum - 1])))) - return IsBlockInNameSpace(nesting_state, is_forward_declaration) +_HEADERS_CONTAINING_TEMPLATES = ( + ('', ('deque',)), + ('', ('unary_function', 'binary_function', + 'plus', 'minus', 'multiplies', 'divides', 'modulus', + 'negate', + 'equal_to', 'not_equal_to', 'greater', 'less', + 'greater_equal', 'less_equal', + 'logical_and', 'logical_or', 'logical_not', + 'unary_negate', 'not1', 'binary_negate', 'not2', + 'bind1st', 'bind2nd', + 'pointer_to_unary_function', + 'pointer_to_binary_function', + 'ptr_fun', + 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', + 'mem_fun_ref_t', + 'const_mem_fun_t', 'const_mem_fun1_t', + 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', + 'mem_fun_ref', + )), + ('', ('numeric_limits',)), + ('', ('list',)), + ('', ('multimap',)), + ('', ('allocator', 'make_shared', 'make_unique', 'shared_ptr', + 'unique_ptr', 'weak_ptr')), + ('', ('queue', 'priority_queue',)), + ('', ('set', 'multiset',)), + ('', ('stack',)), + ('', ('char_traits', 'basic_string',)), + ('', ('tuple',)), + ('', ('unordered_map', 'unordered_multimap')), + ('', ('unordered_set', 'unordered_multiset')), + ('', ('pair',)), + ('', ('vector',)), -# Call this method if the line is directly inside of a namespace. -# If the line above is blank (excluding comments) or the start of -# an inner namespace, it cannot be indented. -def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, error): - line = raw_lines_no_comments[linenum] - if Match(r"^\s+", line): - error( - filename, - linenum, - "runtime/indentation_namespace", - 4, - "Do not indent within a namespace", - ) - - -def ProcessLine( - filename, - file_extension, - clean_lines, - line, - include_state, - function_state, - nesting_state, - error, - extra_check_functions=None, -): - """Processes a single line in the file. + # gcc extensions. + # Note: std::hash is their hash, ::hash is our hash + ('', ('hash_map', 'hash_multimap',)), + ('', ('hash_set', 'hash_multiset',)), + ('', ('slist',)), + ) - Args: - filename: Filename of the file that is being processed. - file_extension: The extension (dot not included) of the file. - clean_lines: An array of strings, each representing a line of the file, - with comments stripped. - line: Number of line being processed. - include_state: An _IncludeState instance in which the headers are inserted. - function_state: A _FunctionState instance which counts function lines, etc. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: A callable to which errors are reported, which takes 4 arguments: - filename, line number, error level, and message - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - raw_lines = clean_lines.raw_lines - ParseNolintSuppressions(filename, raw_lines[line], line, error) - nesting_state.Update(filename, clean_lines, line, error) - CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, error) - if nesting_state.InAsmBlock(): - return - CheckForFunctionLengths(filename, clean_lines, line, function_state, error) - CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) - CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) - CheckLanguage( - filename, clean_lines, line, file_extension, include_state, nesting_state, error +_HEADERS_MAYBE_TEMPLATES = ( + ('', ('copy', 'max', 'min', 'min_element', 'sort', + 'transform', + )), + ('', ('forward', 'make_pair', 'move', 'swap')), ) - CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) - CheckForNonStandardConstructs(filename, clean_lines, line, nesting_state, error) - CheckVlogArguments(filename, clean_lines, line, error) - CheckPosixThreading(filename, clean_lines, line, error) - CheckInvalidIncrement(filename, clean_lines, line, error) - CheckMakePairUsesDeduction(filename, clean_lines, line, error) - CheckRedundantVirtual(filename, clean_lines, line, error) - CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) - if extra_check_functions: - for check_fn in extra_check_functions: - check_fn(filename, clean_lines, line, error) - - -def FlagCxx11Features(filename, clean_lines, linenum, error): - """Flag those c++11 features that we only allow in certain places. - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] +# Non templated types or global objects +_HEADERS_TYPES_OR_OBJS = ( + # String and others are special -- it is a non-templatized type in STL. + ('', ('string',)), + ('', ('cin', 'cout', 'cerr', 'clog', 'wcin', 'wcout', + 'wcerr', 'wclog')), + ('', ('FILE', 'fpos_t'))) + +# Non templated functions +_HEADERS_FUNCTIONS = ( + ('', ('fopen', 'freopen', + 'fclose', 'fflush', 'setbuf', 'setvbuf', 'fread', + 'fwrite', 'fgetc', 'getc', 'fgets', 'fputc', 'putc', + 'fputs', 'getchar', 'gets', 'putchar', 'puts', 'ungetc', + 'scanf', 'fscanf', 'sscanf', 'vscanf', 'vfscanf', + 'vsscanf', 'printf', 'fprintf', 'sprintf', 'snprintf', + 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf', + 'ftell', 'fgetpos', 'fseek', 'fsetpos', + 'clearerr', 'feof', 'ferror', 'perror', + 'tmpfile', 'tmpnam'),),) - include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) - - # Flag unapproved C++ TR1 headers. - if include and include.group(1).startswith("tr1/"): - error( - filename, - linenum, - "build/c++tr1", - 5, - ("C++ TR1 headers such as <%s> are unapproved.") % include.group(1), - ) - - # Flag unapproved C++11 headers. - if include and include.group(1) in ( - "cfenv", - # "condition_variable", - "fenv.h", - # "future", - # "mutex", - # "thread", - # "chrono", - "ratio", - # "regex", - "system_error", - ): - error( - filename, - linenum, - "build/c++11", - 5, - ("<%s> is an unapproved C++11 header.") % include.group(1), - ) - - # The only place where we need to worry about C++11 keywords and library - # features in preprocessor directives is in macro definitions. - if Match(r"\s*#", line) and not Match(r"\s*#\s*define\b", line): - return +_re_pattern_headers_maybe_templates = [] +for _header, _templates in _HEADERS_MAYBE_TEMPLATES: + for _template in _templates: + # Match max(..., ...), max(..., ...), but not foo->max, foo.max or + # 'type::max()'. + _re_pattern_headers_maybe_templates.append( + (re.compile(r'((\bstd::)|[^>.:])\b' + _template + r'(<.*?>)?\([^\)]'), + _template, + _header)) + +# Map is often overloaded. Only check, if it is fully qualified. +# Match 'std::map(...)', but not 'map(...)'' +_re_pattern_headers_maybe_templates.append( + (re.compile(r'(std\b::\bmap\s*\<)|(^(std\b::\b)map\b\(\s*\<)'), + 'map<>', + '')) - # These are classes and free functions. The classes are always - # mentioned as std::*, but we only catch the free functions if - # they're not found by ADL. They're alphabetical by header. - for top_name in ( - # type_traits - "alignment_of", - "aligned_union", - ): - if Search(r"\bstd::%s\b" % top_name, line): - error( - filename, - linenum, - "build/c++11", - 5, - ( - "std::%s is an unapproved C++11 class or function. Send c-style " - "an example of where it would make your code more readable, and " - "they may let you use it." - ) - % top_name, - ) - - -def FlagCxx14Features(filename, clean_lines, linenum, error): - """Flag those C++14 features that we restrict. +# Other scripts may reach in and modify this pattern. +_re_pattern_templates = [] +for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: + for _template in _templates: + _re_pattern_templates.append( + (re.compile(r'((^|(^|\s|((^|\W)::))std::)|[^>.:]\b)' + _template + r'\s*\<'), + _template + '<>', + _header)) + +_re_pattern_types_or_objs = [] +for _header, _types_or_objs in _HEADERS_TYPES_OR_OBJS: + for _type_or_obj in _types_or_objs: + _re_pattern_types_or_objs.append( + (re.compile(r'\b' + _type_or_obj + r'\b'), + _type_or_obj, + _header)) + +_re_pattern_functions = [] +for _header, _functions in _HEADERS_FUNCTIONS: + for _function in _functions: + # Match printf(..., ...), but not foo->printf, foo.printf or + # 'type::printf()'. + _re_pattern_functions.append( + (re.compile(r'([^>.]|^)\b' + _function + r'\([^\)]'), + _function, + _header)) - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ +def FilesBelongToSameModule(filename_cc, filename_h): + """Check if these two filenames belong to the same module. + + The concept of a 'module' here is a as follows: + foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the + same 'module' if they are in the same directory. + some/path/public/xyzzy and some/path/internal/xyzzy are also considered + to belong to the same module here. + + If the filename_cc contains a longer path than the filename_h, for example, + '/absolute/path/to/base/sysinfo.cc', and this file would include + 'base/sysinfo.h', this function also produces the prefix needed to open the + header. This is used by the caller of this function to more robustly open the + header file. We don't have access to the real include paths in this context, + so we need this guesswork here. + + Known bugs: tools/base/bar.cc and base/bar.h belong to the same module + according to this implementation. Because of this, this function gives + some false positives. This should be sufficiently rare in practice. + + Args: + filename_cc: is the path for the source (e.g. .cc) file + filename_h: is the path for the header path + + Returns: + Tuple with a bool and a string: + bool: True if filename_cc and filename_h belong to the same module. + string: the additional prefix needed to open the header file. + """ + fileinfo_cc = FileInfo(filename_cc) + if fileinfo_cc.Extension().lstrip('.') not in GetNonHeaderExtensions(): + return (False, '') + + fileinfo_h = FileInfo(filename_h) + if not IsHeaderExtension(fileinfo_h.Extension().lstrip('.')): + return (False, '') + + filename_cc = filename_cc[:-(len(fileinfo_cc.Extension()))] + matched_test_suffix = re.search(_TEST_FILE_SUFFIX, fileinfo_cc.BaseName()) + if matched_test_suffix: + filename_cc = filename_cc[:-len(matched_test_suffix.group(1))] + + filename_cc = filename_cc.replace('/public/', '/') + filename_cc = filename_cc.replace('/internal/', '/') + + filename_h = filename_h[:-(len(fileinfo_h.Extension()))] + if filename_h.endswith('-inl'): + filename_h = filename_h[:-len('-inl')] + filename_h = filename_h.replace('/public/', '/') + filename_h = filename_h.replace('/internal/', '/') + + files_belong_to_same_module = filename_cc.endswith(filename_h) + common_path = '' + if files_belong_to_same_module: + common_path = filename_cc[:-len(filename_h)] + return files_belong_to_same_module, common_path + + +def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, + io=codecs): + """Reports for missing stl includes. + + This function will output warnings to make sure you are including the headers + necessary for the stl containers and functions that you use. We only give one + reason to include a header. For example, if you use both equal_to<> and + less<> in a .h file, only one (the latter in the file) of these will be + reported as a reason to include the . + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + include_state: An _IncludeState instance. + error: The function to call with any errors found. + io: The IO factory to use to read the header file. Provided for unittest + injection. + """ + required = {} # A map of header name to linenumber and the template entity. + # Example of required: { '': (1219, 'less<>') } + + for linenum in range(clean_lines.NumLines()): line = clean_lines.elided[linenum] + if not line or line[0] == '#': + continue + + _re_patterns = [] + _re_patterns.extend(_re_pattern_types_or_objs) + _re_patterns.extend(_re_pattern_functions) + for pattern, item, header in _re_patterns: + matched = pattern.search(line) + if matched: + # Don't warn about strings in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[header] = (linenum, item) + + for pattern, template, header in _re_pattern_headers_maybe_templates: + if pattern.search(line): + required[header] = (linenum, template) + + # The following function is just a speed up, no semantics are changed. + if '<' not in line: # Reduces the cpu time usage by skipping lines. + continue + + for pattern, template, header in _re_pattern_templates: + matched = pattern.search(line) + if matched: + # Don't warn about IWYU in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[header] = (linenum, template) + + # Let's flatten the include_state include_list and copy it into a dictionary. + include_dict = dict([item for sublist in include_state.include_list + for item in sublist]) + + # All the lines have been processed, report the errors found. + for required_header_unstripped in sorted(required, key=required.__getitem__): + template = required[required_header_unstripped][1] + if required_header_unstripped.strip('<>"') not in include_dict: + error(filename, required[required_header_unstripped][0], + 'build/include_what_you_use', 4, + 'Add #include ' + required_header_unstripped + ' for ' + template) + + +_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') - include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) - - # Flag unapproved C++14 headers. - if include and include.group(1) in ("scoped_allocator", "shared_mutex"): - error( - filename, - linenum, - "build/c++14", - 5, - ("<%s> is an unapproved C++14 header.") % include.group(1), - ) +def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): + """Check that make_pair's template arguments are deduced. + + G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are + specified explicitly, and such use isn't intended in any case. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) + if match: + error(filename, linenum, 'build/explicit_make_pair', + 4, # 4 = high confidence + 'For C++11-compatibility, omit template arguments from make_pair' + ' OR use pair directly OR if appropriate, construct a pair directly') -def ProcessFileData(filename, file_extension, lines, error, extra_check_functions=None): - """Performs lint checks and reports any errors to the given error function. - Args: - filename: Filename of the file that is being processed. - file_extension: The extension (dot not included) of the file. - lines: An array of strings, each representing a line of the file, with the - last element being empty if the file is terminated with a newline. - error: A callable to which errors are reported, which takes 4 arguments: - filename, line number, error level, and message - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - lines = ( - ["// marker so line numbers and indices both start at 1"] - + lines - + ["// marker so line numbers end in a known way"] - ) +def CheckRedundantVirtual(filename, clean_lines, linenum, error): + """Check if line contains a redundant "virtual" function-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for "virtual" on current line. + line = clean_lines.elided[linenum] + virtual = re.match(r'^(.*)(\bvirtual\b)(.*)$', line) + if not virtual: return + + # Ignore "virtual" keywords that are near access-specifiers. These + # are only used in class base-specifier and do not apply to member + # functions. + if (re.search(r'\b(public|protected|private)\s+$', virtual.group(1)) or + re.match(r'^\s+(public|protected|private)\b', virtual.group(3))): + return + + # Ignore the "virtual" keyword from virtual base classes. Usually + # there is a column on the same line in these cases (virtual base + # classes are rare in google3 because multiple inheritance is rare). + if re.match(r'^.*[^:]:[^:].*$', line): return + + # Look for the next opening parenthesis. This is the start of the + # parameter list (possibly on the next line shortly after virtual). + # TODO(unknown): doesn't work if there are virtual functions with + # decltype() or other things that use parentheses, but csearch suggests + # that this is rare. + end_col = -1 + end_line = -1 + start_col = len(virtual.group(2)) + for start_line in range(linenum, min(linenum + 3, clean_lines.NumLines())): + line = clean_lines.elided[start_line][start_col:] + parameter_list = re.match(r'^([^(]*)\(', line) + if parameter_list: + # Match parentheses to find the end of the parameter list + (_, end_line, end_col) = CloseExpression( + clean_lines, start_line, start_col + len(parameter_list.group(1))) + break + start_col = 0 + + if end_col < 0: + return # Couldn't find end of parameter list, give up + + # Look for "override" or "final" after the parameter list + # (possibly on the next few lines). + for i in range(end_line, min(end_line + 3, clean_lines.NumLines())): + line = clean_lines.elided[i][end_col:] + match = re.search(r'\b(override|final)\b', line) + if match: + error(filename, linenum, 'readability/inheritance', 4, + ('"virtual" is redundant since function is ' + f'already declared as "{match.group(1)}"')) - include_state = _IncludeState() - function_state = _FunctionState() - nesting_state = NestingState() + # Set end_col to check whole lines after we are done with the + # first line. + end_col = 0 + if re.search(r'[^\w]\s*$', line): + break - ResetNolintSuppressions() - CheckForCopyright(filename, lines, error) - ProcessGlobalSuppresions(lines) - RemoveMultiLineComments(filename, lines, error) - clean_lines = CleansedLines(lines) +def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): + """Check if line contains a redundant "override" or "final" virt-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for closing parenthesis nearby. We need one to confirm where + # the declarator ends and where the virt-specifier starts to avoid + # false positives. + line = clean_lines.elided[linenum] + declarator_end = line.rfind(')') + if declarator_end >= 0: + fragment = line[declarator_end:] + else: + if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0: + fragment = line + else: + return - if IsHeaderExtension(file_extension): - CheckForHeaderGuard(filename, clean_lines, error) + # Check that at most one of "override" or "final" is present, not both + if re.search(r'\boverride\b', fragment) and re.search(r'\bfinal\b', fragment): + error(filename, linenum, 'readability/inheritance', 4, + ('"override" is redundant since function is ' + 'already declared as "final"')) - for line in xrange(clean_lines.NumLines()): - ProcessLine( - filename, - file_extension, - clean_lines, - line, - include_state, - function_state, - nesting_state, - error, - extra_check_functions, - ) - FlagCxx11Features(filename, clean_lines, line, error) - nesting_state.CheckCompletedBlocks(filename, error) - CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) - # Check that the .cc file has included its header if it exists. - if _IsSourceExtension(file_extension): - CheckHeaderFileIncluded(filename, include_state, error) - # We check here rather than inside ProcessLine so that we see raw - # lines rather than "cleaned" lines. - CheckForBadCharacters(filename, lines, error) +# Returns true if we are at a new block, and it is directly +# inside of a namespace. +def IsBlockInNameSpace(nesting_state, is_forward_declaration): + """Checks that the new block is directly in a namespace. + + Args: + nesting_state: The _NestingState object that contains info about our state. + is_forward_declaration: If the class is a forward declared class. + Returns: + Whether or not the new block is directly in a namespace. + """ + if is_forward_declaration: + return len(nesting_state.stack) >= 1 and ( + isinstance(nesting_state.stack[-1], _NamespaceInfo)) + + if len(nesting_state.stack) >= 1: + if isinstance(nesting_state.stack[-1], _NamespaceInfo): + return True + elif (len(nesting_state.stack) > 1 and + isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and + isinstance(nesting_state.stack[-2], _NamespaceInfo)): + return True + return False + + +def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + raw_lines_no_comments, linenum): + """This method determines if we should apply our namespace indentation check. + + Args: + nesting_state: The current nesting state. + is_namespace_indent_item: If we just put a new class on the stack, True. + If the top of the stack is not a class, or we did not recently + add the class, False. + raw_lines_no_comments: The lines without the comments. + linenum: The current line number we are processing. + + Returns: + True if we should apply our namespace indentation check. Currently, it + only works for classes and namespaces inside of a namespace. + """ + + is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, + linenum) + + if not (is_namespace_indent_item or is_forward_declaration): + return False - CheckForNewlineAtEOF(filename, lines, error) + # If we are in a macro, we do not want to check the namespace indentation. + if IsMacroDefinition(raw_lines_no_comments, linenum): + return False + return IsBlockInNameSpace(nesting_state, is_forward_declaration) -def ProcessConfigOverrides(filename): - """Loads the configuration files and processes the config overrides. - Args: - filename: The name of the file being processed by the linter. +# Call this method if the line is directly inside of a namespace. +# If the line above is blank (excluding comments) or the start of +# an inner namespace, it cannot be indented. +def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, + error): + # JMM: clang-format and cpplint disagree what "indenting in a + # namespace means + return + line = raw_lines_no_comments[linenum] + if re.match(r'^\s+', line): + error(filename, linenum, 'whitespace/indent_namespace', 4, + 'Do not indent within a namespace.') + + +def ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions=None): + """Processes a single line in the file. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + clean_lines: An array of strings, each representing a line of the file, + with comments stripped. + line: Number of line being processed. + include_state: An _IncludeState instance in which the headers are inserted. + function_state: A _FunctionState instance which counts function lines, etc. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[line], line, error) + nesting_state.Update(filename, clean_lines, line, error) + CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error) + if nesting_state.InAsmBlock(): return + CheckForFunctionLengths(filename, clean_lines, line, function_state, error) + CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) + CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) + CheckLanguage(filename, clean_lines, line, file_extension, include_state, + nesting_state, error) + CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) + CheckForNonStandardConstructs(filename, clean_lines, line, + nesting_state, error) + CheckVlogArguments(filename, clean_lines, line, error) + CheckPosixThreading(filename, clean_lines, line, error) + CheckInvalidIncrement(filename, clean_lines, line, error) + CheckMakePairUsesDeduction(filename, clean_lines, line, error) + CheckRedundantVirtual(filename, clean_lines, line, error) + CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) + if extra_check_functions: + for check_fn in extra_check_functions: + check_fn(filename, clean_lines, line, error) + + +def FlagCxxHeaders(filename, clean_lines, linenum, error): + """Flag C++ headers that the styleguide restricts. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = re.match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++11 headers. + if include and include.group(1) in ('cfenv', + 'fenv.h', + 'ratio', + ): + error(filename, linenum, 'build/c++11', 5, + f"<{include.group(1)}> is an unapproved C++11 header.") + + # filesystem is the only unapproved C++17 header + if include and include.group(1) == 'filesystem': + error(filename, linenum, 'build/c++17', 5, + " is an unapproved C++17 header.") + + +def ProcessFileData(filename, file_extension, lines, error, + extra_check_functions=None): + """Performs lint checks and reports any errors to the given error function. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + lines = (['// marker so line numbers and indices both start at 1'] + lines + + ['// marker so line numbers end in a known way']) + + include_state = _IncludeState() + function_state = _FunctionState() + nesting_state = NestingState() + + ResetNolintSuppressions() + + CheckForCopyright(filename, lines, error) + ProcessGlobalSuppressions(lines) + RemoveMultiLineComments(filename, lines, error) + clean_lines = CleansedLines(lines) + + if IsHeaderExtension(file_extension): + CheckForHeaderGuard(filename, clean_lines, error) + + for line in range(clean_lines.NumLines()): + ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions) + FlagCxxHeaders(filename, clean_lines, line, error) + if _error_suppressions.HasOpenBlock(): + error(filename, _error_suppressions.GetOpenBlockStart(), 'readability/nolint', 5, + 'NONLINT block never ended') + + CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) + + # Check that the .cc file has included its header if it exists. + if _IsSourceExtension(file_extension): + CheckHeaderFileIncluded(filename, include_state, error) + + # We check here rather than inside ProcessLine so that we see raw + # lines rather than "cleaned" lines. + CheckForBadCharacters(filename, lines, error) + + CheckForNewlineAtEOF(filename, lines, error) - Returns: - False if the current |filename| should not be processed further. - """ +def ProcessConfigOverrides(filename): + """ Loads the configuration files and processes the config overrides. - abs_filename = os.path.abspath(filename) - cfg_filters = [] - keep_looking = True - while keep_looking: - abs_path, base_name = os.path.split(abs_filename) - if not base_name: - break # Reached the root directory. - - cfg_file = os.path.join(abs_path, "CPPLINT.cfg") - abs_filename = abs_path - if not os.path.isfile(cfg_file): - continue + Args: + filename: The name of the file being processed by the linter. - try: - with open(cfg_file) as file_handle: - for line in file_handle: - line, _, _ = line.partition("#") # Remove comments. - if not line.strip(): - continue - - name, _, val = line.partition("=") - name = name.strip() - val = val.strip() - if name == "set noparent": - keep_looking = False - elif name == "filter": - cfg_filters.append(val) - elif name == "exclude_files": - # When matching exclude_files pattern, use the base_name of - # the current file name or the directory name we are processing. - # For example, if we are checking for lint errors in /foo/bar/baz.cc - # and we found the .cfg file at /foo/CPPLINT.cfg, then the config - # file's "exclude_files" filter is meant to be checked against "bar" - # and not "baz" nor "bar/baz.cc". - if base_name: - pattern = re.compile(val) - if pattern.match(base_name): - if _cpplint_state.quiet: - # Suppress "Ignoring file" warning when using --quiet. - return False - _cpplint_state.PrintInfo( - 'Ignoring "%s": file excluded by "%s". ' - 'File path component "%s" matches ' - 'pattern "%s"\n' - % (filename, cfg_file, base_name, val) - ) - return False - elif name == "linelength": - global _line_length - try: - _line_length = int(val) - except ValueError: - _cpplint_state.PrintError("Line length must be numeric.") - elif name == "extensions": - ProcessExtensionsOption(val) - elif name == "root": - global _root - # root directories are specified relative to CPPLINT.cfg dir. - _root = os.path.join(os.path.dirname(cfg_file), val) - elif name == "headers": - ProcessHppHeadersOption(val) - else: - _cpplint_state.PrintError( - "Invalid configuration option (%s) in file %s\n" - % (name, cfg_file) - ) - - except IOError: - _cpplint_state.PrintError( - "Skipping config file '%s': Can't open for reading\n" % cfg_file - ) - keep_looking = False + Returns: + False if the current |filename| should not be processed further. + """ - # Apply all the accumulated filters in reverse order (top-level directory - # config options having the least priority). - for cfg_filter in reversed(cfg_filters): - _AddFilters(cfg_filter) + abs_filename = os.path.abspath(filename) + cfg_filters = [] + keep_looking = True + while keep_looking: + abs_path, base_name = os.path.split(abs_filename) + if not base_name: + break # Reached the root directory. - return True + cfg_file = os.path.join(abs_path, _config_filename) + abs_filename = abs_path + if not os.path.isfile(cfg_file): + continue + try: + with codecs.open(cfg_file, 'r', 'utf8', 'replace') as file_handle: + for line in file_handle: + line, _, _ = line.partition('#') # Remove comments. + if not line.strip(): + continue -def ProcessFile(filename, vlevel, extra_check_functions=None): - """Does google-lint on a single file. + name, _, val = line.partition('=') + name = name.strip() + val = val.strip() + if name == 'set noparent': + keep_looking = False + elif name == 'filter': + cfg_filters.append(val) + elif name == 'exclude_files': + # When matching exclude_files pattern, use the base_name of + # the current file name or the directory name we are processing. + # For example, if we are checking for lint errors in /foo/bar/baz.cc + # and we found the .cfg file at /foo/CPPLINT.cfg, then the config + # file's "exclude_files" filter is meant to be checked against "bar" + # and not "baz" nor "bar/baz.cc". + if base_name: + pattern = re.compile(val) + if pattern.match(base_name): + if _cpplint_state.quiet: + # Suppress "Ignoring file" warning when using --quiet. + return False + _cpplint_state.PrintInfo(f'Ignoring "{filename}": file excluded by "{cfg_file}". ' + 'File path component "%s" matches ' + 'pattern "%s"\n' % + (base_name, val)) + return False + elif name == 'linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + _cpplint_state.PrintError('Line length must be numeric.') + elif name == 'extensions': + ProcessExtensionsOption(val) + elif name == 'root': + global _root + # root directories are specified relative to CPPLINT.cfg dir. + _root = os.path.join(os.path.dirname(cfg_file), val) + elif name == 'headers': + ProcessHppHeadersOption(val) + elif name == 'includeorder': + ProcessIncludeOrderOption(val) + else: + _cpplint_state.PrintError( + f'Invalid configuration option ({name}) in file {cfg_file}\n') - Args: - filename: The name of the file to parse. + except IOError: + _cpplint_state.PrintError( + f"Skipping config file '{cfg_file}': Can't open for reading\n") + keep_looking = False - vlevel: The level of errors to report. Every error of confidence - >= verbose_level will be reported. 0 is a good default. + # Apply all the accumulated filters in reverse order (top-level directory + # config options having the least priority). + for cfg_filter in reversed(cfg_filters): + _AddFilters(cfg_filter) - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ + return True - _SetVerboseLevel(vlevel) - _BackupFilters() - old_errors = _cpplint_state.error_count - if not ProcessConfigOverrides(filename): - _RestoreFilters() - return +def ProcessFile(filename, vlevel, extra_check_functions=None): + """Does google-lint on a single file. - lf_lines = [] - crlf_lines = [] - try: - # Support the UNIX convention of using "-" for stdin. Note that - # we are not opening the file with universal newline support - # (which codecs doesn't support anyway), so the resulting lines do - # contain trailing '\r' characters if we are reading a file that - # has CRLF endings. - # If after the split a trailing '\r' is present, it is removed - # below. - if filename == "-": - lines = ( - codecs.StreamReaderWriter( - sys.stdin, - codecs.getreader("utf8"), - codecs.getwriter("utf8"), - "replace", - ) - .read() - .split("\n") - ) - else: - lines = codecs.open(filename, "r", "utf8", "replace").read().split("\n") + Args: + filename: The name of the file to parse. - # Remove trailing '\r'. - # The -1 accounts for the extra trailing blank line we get from split() - for linenum in range(len(lines) - 1): - if lines[linenum].endswith("\r"): - lines[linenum] = lines[linenum].rstrip("\r") - crlf_lines.append(linenum + 1) - else: - lf_lines.append(linenum + 1) + vlevel: The level of errors to report. Every error of confidence + >= verbose_level will be reported. 0 is a good default. - except IOError: - _cpplint_state.PrintError( - "Skipping input '%s': Can't open for reading\n" % filename - ) - _RestoreFilters() - return + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ - # Note, if no dot is found, this will give the entire filename as the ext. - file_extension = filename[filename.rfind(".") + 1 :] + _SetVerboseLevel(vlevel) + _BackupFilters() + old_errors = _cpplint_state.error_count - # When reading from stdin, the extension is unknown, so no cpplint tests - # should rely on the extension. - if filename != "-" and file_extension not in GetAllExtensions(): - _cpplint_state.PrintError( - "Ignoring %s; not a valid file name " - "(%s)\n" % (filename, ", ".join(GetAllExtensions())) - ) + if not ProcessConfigOverrides(filename): + _RestoreFilters() + return + + lf_lines = [] + crlf_lines = [] + try: + # Support the UNIX convention of using "-" for stdin. Note that + # we are not opening the file with universal newline support + # (which codecs doesn't support anyway), so the resulting lines do + # contain trailing '\r' characters if we are reading a file that + # has CRLF endings. + # If after the split a trailing '\r' is present, it is removed + # below. + if filename == '-': + lines = codecs.StreamReaderWriter(sys.stdin, + codecs.getreader('utf8'), + codecs.getwriter('utf8'), + 'replace').read().split('\n') else: - ProcessFileData(filename, file_extension, lines, Error, extra_check_functions) - - # If end-of-line sequences are a mix of LF and CR-LF, issue - # warnings on the lines with CR. - # - # Don't issue any warnings if all lines are uniformly LF or CR-LF, - # since critique can handle these just fine, and the style guide - # doesn't dictate a particular end of line sequence. - # - # We can't depend on os.linesep to determine what the desired - # end-of-line sequence should be, since that will return the - # server-side end-of-line sequence. - if lf_lines and crlf_lines: - # Warn on every line with CR. An alternative approach might be to - # check whether the file is mostly CRLF or just LF, and warn on the - # minority, we bias toward LF here since most tools prefer LF. - for linenum in crlf_lines: - Error( - filename, - linenum, - "whitespace/newline", - 1, - "Unexpected \\r (^M) found; better to use only \\n", - ) - - # Suppress printing anything if --quiet was passed unless the error - # count has increased after processing this file. - if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count: - _cpplint_state.PrintInfo("Done processing %s\n" % filename) + with codecs.open(filename, 'r', 'utf8', 'replace') as target_file: + lines = target_file.read().split('\n') + + # Remove trailing '\r'. + # The -1 accounts for the extra trailing blank line we get from split() + for linenum in range(len(lines) - 1): + if lines[linenum].endswith('\r'): + lines[linenum] = lines[linenum].rstrip('\r') + crlf_lines.append(linenum + 1) + else: + lf_lines.append(linenum + 1) + + except IOError: + # TODO: Maybe make this have an exit code of 2 after all is done + _cpplint_state.PrintError( + f"Skipping input '{filename}': Can't open for reading\n") _RestoreFilters() + return + + # Note, if no dot is found, this will give the entire filename as the ext. + file_extension = filename[filename.rfind('.') + 1:] + + # When reading from stdin, the extension is unknown, so no cpplint tests + # should rely on the extension. + if filename != '-' and file_extension not in GetAllExtensions(): + _cpplint_state.PrintError(f'Ignoring {filename}; not a valid file name' + f' ({(", ".join(GetAllExtensions()))})\n') + else: + ProcessFileData(filename, file_extension, lines, Error, + extra_check_functions) + + # If end-of-line sequences are a mix of LF and CR-LF, issue + # warnings on the lines with CR. + # + # Don't issue any warnings if all lines are uniformly LF or CR-LF, + # since critique can handle these just fine, and the style guide + # doesn't dictate a particular end of line sequence. + # + # We can't depend on os.linesep to determine what the desired + # end-of-line sequence should be, since that will return the + # server-side end-of-line sequence. + if lf_lines and crlf_lines: + # Warn on every line with CR. An alternative approach might be to + # check whether the file is mostly CRLF or just LF, and warn on the + # minority, we bias toward LF here since most tools prefer LF. + for linenum in crlf_lines: + Error(filename, linenum, 'whitespace/newline', 1, + 'Unexpected \\r (^M) found; better to use only \\n') + + # Suppress printing anything if --quiet was passed unless the error + # count has increased after processing this file. + if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count: + _cpplint_state.PrintInfo(f'Done processing {filename}\n') + _RestoreFilters() def PrintUsage(message): - """Prints a brief usage string and exits, optionally with an error message. - - Args: - message: The optional error message. - """ - sys.stderr.write( - _USAGE - % ( - list(GetAllExtensions()), - ",".join(list(GetAllExtensions())), - GetHeaderExtensions(), - ",".join(GetHeaderExtensions()), - ) - ) - - if message: - sys.exit("\nFATAL ERROR: " + message) - else: - sys.exit(0) - - -def PrintVersion(): - sys.stdout.write("Cpplint fork (https://github.com/cpplint/cpplint)\n") - sys.stdout.write("cpplint " + __VERSION__ + "\n") - sys.stdout.write("Python " + sys.version + "\n") + """Prints a brief usage string and exits, optionally with an error message. + + Args: + message: The optional error message. + """ + sys.stderr.write(_USAGE % (sorted(list(GetAllExtensions())), + ','.join(sorted(list(GetAllExtensions()))), + sorted(GetHeaderExtensions()), + ','.join(sorted(GetHeaderExtensions())))) + + if message: + sys.exit('\nFATAL ERROR: ' + message) + else: sys.exit(0) +def PrintVersion(): + sys.stdout.write('Cpplint fork (https://github.com/cpplint/cpplint)\n') + sys.stdout.write('cpplint ' + __VERSION__ + '\n') + sys.stdout.write('Python ' + sys.version + '\n') + sys.exit(0) def PrintCategories(): - """Prints a list of all the error-categories used by error messages. + """Prints a list of all the error-categories used by error messages. - These are the categories used to filter messages via --filter. - """ - sys.stderr.write("".join(" %s\n" % cat for cat in _ERROR_CATEGORIES)) - sys.exit(0) + These are the categories used to filter messages via --filter. + """ + sys.stderr.write(''.join(f' {cat}\n' for cat in _ERROR_CATEGORIES)) + sys.exit(0) def ParseArguments(args): - """Parses the command line arguments. - - This may set the output format and verbosity level as side-effects. - - Args: - args: The command line arguments: - - Returns: - The list of filenames to lint. - """ - try: - (opts, filenames) = getopt.getopt( - args, - "", - [ - "help", - "output=", - "verbose=", - "v=", - "version", - "counting=", - "filter=", - "root=", - "repository=", - "linelength=", - "extensions=", - "exclude=", - "recursive", - "headers=", - "quiet", - ], - ) - except getopt.GetoptError: - PrintUsage("Invalid arguments.") - - verbosity = _VerboseLevel() - output_format = _OutputFormat() - filters = "" - quiet = _Quiet() - counting_style = "" - recursive = False - - for opt, val in opts: - if opt == "--help": - PrintUsage(None) - if opt == "--version": - PrintVersion() - elif opt == "--output": - if val not in ("emacs", "vs7", "eclipse", "junit"): - PrintUsage( - "The only allowed output formats are emacs, vs7, eclipse " - "and junit." - ) - output_format = val - elif opt == "--quiet": - quiet = True - elif opt == "--verbose" or opt == "--v": - verbosity = int(val) - elif opt == "--filter": - filters = val - if not filters: - PrintCategories() - elif opt == "--counting": - if val not in ("total", "toplevel", "detailed"): - PrintUsage("Valid counting options are total, toplevel, and detailed") - counting_style = val - elif opt == "--root": - global _root - _root = val - elif opt == "--repository": - global _repository - _repository = val - elif opt == "--linelength": - global _line_length - try: - _line_length = int(val) - except ValueError: - PrintUsage("Line length must be digits.") - elif opt == "--exclude": - global _excludes - if not _excludes: - _excludes = set() - _excludes.update(glob.glob(val)) - elif opt == "--extensions": - ProcessExtensionsOption(val) - elif opt == "--headers": - ProcessHppHeadersOption(val) - elif opt == "--recursive": - recursive = True - - if not filenames: - PrintUsage("No files were specified.") - - if recursive: - filenames = _ExpandDirectories(filenames) - - if _excludes: - filenames = _FilterExcludedFiles(filenames) - - _SetOutputFormat(output_format) - _SetQuiet(quiet) - _SetVerboseLevel(verbosity) - _SetFilters(filters) - _SetCountingStyle(counting_style) - - return filenames - + """Parses the command line arguments. + + This may set the output format and verbosity level as side-effects. + + Args: + args: The command line arguments: + + Returns: + The list of filenames to lint. + """ + try: + (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', + 'v=', + 'version', + 'counting=', + 'filter=', + 'root=', + 'repository=', + 'linelength=', + 'extensions=', + 'exclude=', + 'recursive', + 'headers=', + 'includeorder=', + 'config=', + 'quiet']) + except getopt.GetoptError: + PrintUsage('Invalid arguments.') + + verbosity = _VerboseLevel() + output_format = _OutputFormat() + filters = '' + quiet = _Quiet() + counting_style = '' + recursive = False + + for (opt, val) in opts: + if opt == '--help': + PrintUsage(None) + if opt == '--version': + PrintVersion() + elif opt == '--output': + if val not in ('emacs', 'vs7', 'eclipse', 'junit', 'sed', 'gsed'): + PrintUsage('The only allowed output formats are emacs, vs7, eclipse ' + 'sed, gsed and junit.') + output_format = val + elif opt == '--quiet': + quiet = True + elif opt == '--verbose' or opt == '--v': + verbosity = int(val) + elif opt == '--filter': + filters = val + if not filters: + PrintCategories() + elif opt == '--counting': + if val not in ('total', 'toplevel', 'detailed'): + PrintUsage('Valid counting options are total, toplevel, and detailed') + counting_style = val + elif opt == '--root': + global _root + _root = val + elif opt == '--repository': + global _repository + _repository = val + elif opt == '--linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + PrintUsage('Line length must be digits.') + elif opt == '--exclude': + global _excludes + if not _excludes: + _excludes = set() + _excludes.update(glob.glob(val)) + elif opt == '--extensions': + ProcessExtensionsOption(val) + elif opt == '--headers': + ProcessHppHeadersOption(val) + elif opt == '--recursive': + recursive = True + elif opt == '--includeorder': + ProcessIncludeOrderOption(val) + elif opt == '--config': + global _config_filename + _config_filename = val + if os.path.basename(_config_filename) != _config_filename: + PrintUsage('Config file name must not include directory components.') + + if not filenames: + PrintUsage('No files were specified.') + + if recursive: + filenames = _ExpandDirectories(filenames) + + if _excludes: + filenames = _FilterExcludedFiles(filenames) + + _SetOutputFormat(output_format) + _SetQuiet(quiet) + _SetVerboseLevel(verbosity) + _SetFilters(filters) + _SetCountingStyle(counting_style) + + filenames.sort() + return filenames + +def _ParseFilterSelector(parameter): + """Parses the given command line parameter for file- and line-specific + exclusions. + readability/casting:file.cpp + readability/casting:file.cpp:43 + + Args: + parameter: The parameter value of --filter + + Returns: + [category, filename, line]. + Category is always given. + Filename is either a filename or empty if all files are meant. + Line is either a line in filename or -1 if all lines are meant. + """ + colon_pos = parameter.find(":") + if colon_pos == -1: + return parameter, "", -1 + category = parameter[:colon_pos] + second_colon_pos = parameter.find(":", colon_pos + 1) + if second_colon_pos == -1: + return category, parameter[colon_pos + 1:], -1 + else: + return category, parameter[colon_pos + 1: second_colon_pos], \ + int(parameter[second_colon_pos + 1:]) def _ExpandDirectories(filenames): - """Searches a list of filenames and replaces directories in the list with - all files descending from those directories. Files with extensions not in - the valid extensions list are excluded. - - Args: - filenames: A list of files or directories - - Returns: - A list of all files that are members of filenames or descended from a - directory in filenames - """ - expanded = set() - for filename in filenames: - if not os.path.isdir(filename): - expanded.add(filename) - continue - - for root, _, files in os.walk(filename): - for loopfile in files: - fullname = os.path.join(root, loopfile) - if fullname.startswith("." + os.path.sep): - fullname = fullname[len("." + os.path.sep) :] - expanded.add(fullname) - - filtered = [] - for filename in expanded: - if os.path.splitext(filename)[1][1:] in GetAllExtensions(): - filtered.append(filename) - return filtered - + """Searches a list of filenames and replaces directories in the list with + all files descending from those directories. Files with extensions not in + the valid extensions list are excluded. + + Args: + filenames: A list of files or directories + + Returns: + A list of all files that are members of filenames or descended from a + directory in filenames + """ + expanded = set() + for filename in filenames: + if not os.path.isdir(filename): + expanded.add(filename) + continue + + for root, _, files in os.walk(filename): + for loopfile in files: + fullname = os.path.join(root, loopfile) + if fullname.startswith('.' + os.path.sep): + fullname = fullname[len('.' + os.path.sep):] + expanded.add(fullname) + + filtered = [] + for filename in expanded: + if os.path.splitext(filename)[1][1:] in GetAllExtensions(): + filtered.append(filename) + return filtered def _FilterExcludedFiles(fnames): - """Filters out files listed in the --exclude command line switch. File paths - in the switch are evaluated relative to the current working directory - """ - exclude_paths = [os.path.abspath(f) for f in _excludes] - # because globbing does not work recursively, exclude all subpath of all excluded entries - return [ - f - for f in fnames - if not any(e for e in exclude_paths if _IsParentOrSame(e, os.path.abspath(f))) - ] - + """Filters out files listed in the --exclude command line switch. File paths + in the switch are evaluated relative to the current working directory + """ + exclude_paths = [os.path.abspath(f) for f in _excludes] + # because globbing does not work recursively, exclude all subpath of all excluded entries + return [f for f in fnames + if not any(e for e in exclude_paths + if _IsParentOrSame(e, os.path.abspath(f)))] def _IsParentOrSame(parent, child): - """Return true if child is subdirectory of parent. - Assumes both paths are absolute and don't contain symlinks. - """ - parent = os.path.normpath(parent) - child = os.path.normpath(child) - if parent == child: - return True - - prefix = os.path.commonprefix([parent, child]) - if prefix != parent: - return False - # Note: os.path.commonprefix operates on character basis, so - # take extra care of situations like '/foo/ba' and '/foo/bar/baz' - child_suffix = child[len(prefix) :] - child_suffix = child_suffix.lstrip(os.sep) - return child == os.path.join(prefix, child_suffix) + """Return true if child is subdirectory of parent. + Assumes both paths are absolute and don't contain symlinks. + """ + parent = os.path.normpath(parent) + child = os.path.normpath(child) + if parent == child: + return True + prefix = os.path.commonprefix([parent, child]) + if prefix != parent: + return False + # Note: os.path.commonprefix operates on character basis, so + # take extra care of situations like '/foo/ba' and '/foo/bar/baz' + child_suffix = child[len(prefix):] + child_suffix = child_suffix.lstrip(os.sep) + return child == os.path.join(prefix, child_suffix) def main(): - filenames = ParseArguments(sys.argv[1:]) - backup_err = sys.stderr - try: - # Change stderr to write with replacement characters so we don't die - # if we try to print something containing non-ASCII characters. - sys.stderr = codecs.StreamReader(sys.stderr, "replace") - - _cpplint_state.ResetErrorCounts() - for filename in filenames: - ProcessFile(filename, _cpplint_state.verbose_level) - # If --quiet is passed, suppress printing error count unless there are errors. - if not _cpplint_state.quiet or _cpplint_state.error_count > 0: - _cpplint_state.PrintErrorCounts() + filenames = ParseArguments(sys.argv[1:]) + backup_err = sys.stderr + try: + # Change stderr to write with replacement characters so we don't die + # if we try to print something containing non-ASCII characters. + sys.stderr = codecs.StreamReader(sys.stderr, 'replace') + + _cpplint_state.ResetErrorCounts() + for filename in filenames: + ProcessFile(filename, _cpplint_state.verbose_level) + # If --quiet is passed, suppress printing error count unless there are errors. + if not _cpplint_state.quiet or _cpplint_state.error_count > 0: + _cpplint_state.PrintErrorCounts() - if _cpplint_state.output_format == "junit": - sys.stderr.write(_cpplint_state.FormatJUnitXML()) + if _cpplint_state.output_format == 'junit': + sys.stderr.write(_cpplint_state.FormatJUnitXML()) - finally: - sys.stderr = backup_err + finally: + sys.stderr = backup_err - sys.exit(_cpplint_state.error_count > 0) + sys.exit(_cpplint_state.error_count > 0) -if __name__ == "__main__": - main() +if __name__ == '__main__': + main() diff --git a/tst/unit/test_swarm.cpp b/tst/unit/test_swarm.cpp index bcff16106110..9dc07ba3c270 100644 --- a/tst/unit/test_swarm.cpp +++ b/tst/unit/test_swarm.cpp @@ -19,8 +19,10 @@ #include #include +#include #include #include +#include #include From cc3aabba5de2f6c507bdc5c6b527e2ec27626d7d Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Fri, 13 Sep 2024 13:48:22 -0600 Subject: [PATCH 30/37] format cpplint with black... derp... --- tst/style/cpplint.py | 12891 ++++++++++++++++++++++------------------- 1 file changed, 6979 insertions(+), 5912 deletions(-) diff --git a/tst/style/cpplint.py b/tst/style/cpplint.py index f513e8fbaec0..0ceed165b7c3 100755 --- a/tst/style/cpplint.py +++ b/tst/style/cpplint.py @@ -59,7 +59,7 @@ # if empty, use defaults _valid_extensions = set([]) -__VERSION__ = '1.7' +__VERSION__ = "1.7" _USAGE = """ Syntax: cpplint.py [--verbose=#] [--output=emacs|eclipse|vs7|junit|sed|gsed] @@ -293,513 +293,515 @@ # If you add a new error message with a new category, add it to the list # here! cpplint_unittest.py should tell you if you forget to do this. _ERROR_CATEGORIES = [ - 'build/c++11', - 'build/c++17', - 'build/deprecated', - 'build/endif_comment', - 'build/explicit_make_pair', - 'build/forward_decl', - 'build/header_guard', - 'build/include', - 'build/include_subdir', - 'build/include_alpha', - 'build/include_order', - 'build/include_what_you_use', - 'build/namespaces_headers', - 'build/namespaces_literals', - 'build/namespaces', - 'build/printf_format', - 'build/storage_class', - 'legal/copyright', - 'readability/alt_tokens', - 'readability/braces', - 'readability/casting', - 'readability/check', - 'readability/constructors', - 'readability/fn_size', - 'readability/inheritance', - 'readability/multiline_comment', - 'readability/multiline_string', - 'readability/namespace', - 'readability/nolint', - 'readability/nul', - 'readability/strings', - 'readability/todo', - 'readability/utf8', - 'runtime/arrays', - 'runtime/casting', - 'runtime/explicit', - 'runtime/int', - 'runtime/init', - 'runtime/invalid_increment', - 'runtime/member_string_references', - 'runtime/memset', - 'runtime/operator', - 'runtime/printf', - 'runtime/printf_format', - 'runtime/references', - 'runtime/string', - 'runtime/threadsafe_fn', - 'runtime/vlog', - 'whitespace/blank_line', - 'whitespace/braces', - 'whitespace/comma', - 'whitespace/comments', - 'whitespace/empty_conditional_body', - 'whitespace/empty_if_body', - 'whitespace/empty_loop_body', - 'whitespace/end_of_line', - 'whitespace/ending_newline', - 'whitespace/forcolon', - 'whitespace/indent', - 'whitespace/indent_namespace', - 'whitespace/line_length', - 'whitespace/newline', - 'whitespace/operators', - 'whitespace/parens', - 'whitespace/semicolon', - 'whitespace/tab', - 'whitespace/todo', - ] + "build/c++11", + "build/c++17", + "build/deprecated", + "build/endif_comment", + "build/explicit_make_pair", + "build/forward_decl", + "build/header_guard", + "build/include", + "build/include_subdir", + "build/include_alpha", + "build/include_order", + "build/include_what_you_use", + "build/namespaces_headers", + "build/namespaces_literals", + "build/namespaces", + "build/printf_format", + "build/storage_class", + "legal/copyright", + "readability/alt_tokens", + "readability/braces", + "readability/casting", + "readability/check", + "readability/constructors", + "readability/fn_size", + "readability/inheritance", + "readability/multiline_comment", + "readability/multiline_string", + "readability/namespace", + "readability/nolint", + "readability/nul", + "readability/strings", + "readability/todo", + "readability/utf8", + "runtime/arrays", + "runtime/casting", + "runtime/explicit", + "runtime/int", + "runtime/init", + "runtime/invalid_increment", + "runtime/member_string_references", + "runtime/memset", + "runtime/operator", + "runtime/printf", + "runtime/printf_format", + "runtime/references", + "runtime/string", + "runtime/threadsafe_fn", + "runtime/vlog", + "whitespace/blank_line", + "whitespace/braces", + "whitespace/comma", + "whitespace/comments", + "whitespace/empty_conditional_body", + "whitespace/empty_if_body", + "whitespace/empty_loop_body", + "whitespace/end_of_line", + "whitespace/ending_newline", + "whitespace/forcolon", + "whitespace/indent", + "whitespace/indent_namespace", + "whitespace/line_length", + "whitespace/newline", + "whitespace/operators", + "whitespace/parens", + "whitespace/semicolon", + "whitespace/tab", + "whitespace/todo", +] # keywords to use with --outputs which generate stdout for machine processing -_MACHINE_OUTPUTS = [ - 'junit', - 'sed', - 'gsed' -] +_MACHINE_OUTPUTS = ["junit", "sed", "gsed"] # These error categories are no longer enforced by cpplint, but for backwards- # compatibility they may still appear in NOLINT comments. _LEGACY_ERROR_CATEGORIES = [ - 'build/class', - 'readability/streams', - 'readability/function', - ] + "build/class", + "readability/streams", + "readability/function", +] # These prefixes for categories should be ignored since they relate to other # tools which also use the NOLINT syntax, e.g. clang-tidy. _OTHER_NOLINT_CATEGORY_PREFIXES = [ - 'clang-analyzer-', - 'abseil-', - 'altera-', - 'android-', - 'boost-', - 'bugprone-', - 'cert-', - 'concurrency-', - 'cppcoreguidelines-', - 'darwin-', - 'fuchsia-', - 'google-', - 'hicpp-', - 'linuxkernel-', - 'llvm-', - 'llvmlibc-', - 'misc-', - 'modernize-', - 'mpi-', - 'objc-', - 'openmp-', - 'performance-', - 'portability-', - 'readability-', - 'zircon-', - ] + "clang-analyzer-", + "abseil-", + "altera-", + "android-", + "boost-", + "bugprone-", + "cert-", + "concurrency-", + "cppcoreguidelines-", + "darwin-", + "fuchsia-", + "google-", + "hicpp-", + "linuxkernel-", + "llvm-", + "llvmlibc-", + "misc-", + "modernize-", + "mpi-", + "objc-", + "openmp-", + "performance-", + "portability-", + "readability-", + "zircon-", +] # The default state of the category filter. This is overridden by the --filter= # flag. By default all errors are on, so only add here categories that should be # off by default (i.e., categories that must be enabled by the --filter= flags). # All entries here should start with a '-' or '+', as in the --filter= flag. _DEFAULT_FILTERS = [ - '-build/include_alpha', - '-readability/fn_size', - ] + "-build/include_alpha", + "-readability/fn_size", +] # The default list of categories suppressed for C (not C++) files. _DEFAULT_C_SUPPRESSED_CATEGORIES = [ - 'readability/casting', - ] + "readability/casting", +] # The default list of categories suppressed for Linux Kernel files. _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [ - 'whitespace/tab', - ] + "whitespace/tab", +] # We used to check for high-bit characters, but after much discussion we # decided those were OK, as long as they were in UTF-8 and didn't represent # hard-coded international strings, which belong in a separate i18n file. # C++ headers -_CPP_HEADERS = frozenset([ - # Legacy - 'algobase.h', - 'algo.h', - 'alloc.h', - 'builtinbuf.h', - 'bvector.h', - # 'complex.h', collides with System C header "complex.h" since C11 - 'defalloc.h', - 'deque.h', - 'editbuf.h', - 'fstream.h', - 'function.h', - 'hash_map', - 'hash_map.h', - 'hash_set', - 'hash_set.h', - 'hashtable.h', - 'heap.h', - 'indstream.h', - 'iomanip.h', - 'iostream.h', - 'istream.h', - 'iterator.h', - 'list.h', - 'map.h', - 'multimap.h', - 'multiset.h', - 'ostream.h', - 'pair.h', - 'parsestream.h', - 'pfstream.h', - 'procbuf.h', - 'pthread_alloc', - 'pthread_alloc.h', - 'rope', - 'rope.h', - 'ropeimpl.h', - 'set.h', - 'slist', - 'slist.h', - 'stack.h', - 'stdiostream.h', - 'stl_alloc.h', - 'stl_relops.h', - 'streambuf.h', - 'stream.h', - 'strfile.h', - 'strstream.h', - 'tempbuf.h', - 'tree.h', - 'type_traits.h', - 'vector.h', - # C++ library headers - 'algorithm', - 'array', - 'atomic', - 'bitset', - 'chrono', - 'codecvt', - 'complex', - 'condition_variable', - 'deque', - 'exception', - 'forward_list', - 'fstream', - 'functional', - 'future', - 'initializer_list', - 'iomanip', - 'ios', - 'iosfwd', - 'iostream', - 'istream', - 'iterator', - 'limits', - 'list', - 'locale', - 'map', - 'memory', - 'mutex', - 'new', - 'numeric', - 'ostream', - 'queue', - 'random', - 'ratio', - 'regex', - 'scoped_allocator', - 'set', - 'sstream', - 'stack', - 'stdexcept', - 'streambuf', - 'string', - 'strstream', - 'system_error', - 'thread', - 'tuple', - 'typeindex', - 'typeinfo', - 'type_traits', - 'unordered_map', - 'unordered_set', - 'utility', - 'valarray', - 'vector', - # C++14 headers - 'shared_mutex', - # C++17 headers - 'any', - 'charconv', - 'codecvt', - 'execution', - 'filesystem', - 'memory_resource', - 'optional', - 'string_view', - 'variant', - # C++20 headers - 'barrier', - 'bit', - 'compare', - 'concepts', - 'coroutine', - 'format', - 'latch' - 'numbers', - 'ranges', - 'semaphore', - 'source_location', - 'span', - 'stop_token', - 'syncstream', - 'version', - # C++23 headers - 'expected', - 'flat_map', - 'flat_set', - 'generator', - 'mdspan', - 'print', - 'spanstream', - 'stacktrace', - 'stdfloat', - # C++ headers for C library facilities - 'cassert', - 'ccomplex', - 'cctype', - 'cerrno', - 'cfenv', - 'cfloat', - 'cinttypes', - 'ciso646', - 'climits', - 'clocale', - 'cmath', - 'csetjmp', - 'csignal', - 'cstdalign', - 'cstdarg', - 'cstdbool', - 'cstddef', - 'cstdint', - 'cstdio', - 'cstdlib', - 'cstring', - 'ctgmath', - 'ctime', - 'cuchar', - 'cwchar', - 'cwctype', - ]) +_CPP_HEADERS = frozenset( + [ + # Legacy + "algobase.h", + "algo.h", + "alloc.h", + "builtinbuf.h", + "bvector.h", + # 'complex.h', collides with System C header "complex.h" since C11 + "defalloc.h", + "deque.h", + "editbuf.h", + "fstream.h", + "function.h", + "hash_map", + "hash_map.h", + "hash_set", + "hash_set.h", + "hashtable.h", + "heap.h", + "indstream.h", + "iomanip.h", + "iostream.h", + "istream.h", + "iterator.h", + "list.h", + "map.h", + "multimap.h", + "multiset.h", + "ostream.h", + "pair.h", + "parsestream.h", + "pfstream.h", + "procbuf.h", + "pthread_alloc", + "pthread_alloc.h", + "rope", + "rope.h", + "ropeimpl.h", + "set.h", + "slist", + "slist.h", + "stack.h", + "stdiostream.h", + "stl_alloc.h", + "stl_relops.h", + "streambuf.h", + "stream.h", + "strfile.h", + "strstream.h", + "tempbuf.h", + "tree.h", + "type_traits.h", + "vector.h", + # C++ library headers + "algorithm", + "array", + "atomic", + "bitset", + "chrono", + "codecvt", + "complex", + "condition_variable", + "deque", + "exception", + "forward_list", + "fstream", + "functional", + "future", + "initializer_list", + "iomanip", + "ios", + "iosfwd", + "iostream", + "istream", + "iterator", + "limits", + "list", + "locale", + "map", + "memory", + "mutex", + "new", + "numeric", + "ostream", + "queue", + "random", + "ratio", + "regex", + "scoped_allocator", + "set", + "sstream", + "stack", + "stdexcept", + "streambuf", + "string", + "strstream", + "system_error", + "thread", + "tuple", + "typeindex", + "typeinfo", + "type_traits", + "unordered_map", + "unordered_set", + "utility", + "valarray", + "vector", + # C++14 headers + "shared_mutex", + # C++17 headers + "any", + "charconv", + "codecvt", + "execution", + "filesystem", + "memory_resource", + "optional", + "string_view", + "variant", + # C++20 headers + "barrier", + "bit", + "compare", + "concepts", + "coroutine", + "format", + "latch" "numbers", + "ranges", + "semaphore", + "source_location", + "span", + "stop_token", + "syncstream", + "version", + # C++23 headers + "expected", + "flat_map", + "flat_set", + "generator", + "mdspan", + "print", + "spanstream", + "stacktrace", + "stdfloat", + # C++ headers for C library facilities + "cassert", + "ccomplex", + "cctype", + "cerrno", + "cfenv", + "cfloat", + "cinttypes", + "ciso646", + "climits", + "clocale", + "cmath", + "csetjmp", + "csignal", + "cstdalign", + "cstdarg", + "cstdbool", + "cstddef", + "cstdint", + "cstdio", + "cstdlib", + "cstring", + "ctgmath", + "ctime", + "cuchar", + "cwchar", + "cwctype", + ] +) # C headers -_C_HEADERS = frozenset([ - # System C headers - 'assert.h', - 'complex.h', - 'ctype.h', - 'errno.h', - 'fenv.h', - 'float.h', - 'inttypes.h', - 'iso646.h', - 'limits.h', - 'locale.h', - 'math.h', - 'setjmp.h', - 'signal.h', - 'stdalign.h', - 'stdarg.h', - 'stdatomic.h', - 'stdbool.h', - 'stddef.h', - 'stdint.h', - 'stdio.h', - 'stdlib.h', - 'stdnoreturn.h', - 'string.h', - 'tgmath.h', - 'threads.h', - 'time.h', - 'uchar.h', - 'wchar.h', - 'wctype.h', - # C23 headers - 'stdbit.h', - 'stdckdint.h', - # additional POSIX C headers - 'aio.h', - 'arpa/inet.h', - 'cpio.h', - 'dirent.h', - 'dlfcn.h', - 'fcntl.h', - 'fmtmsg.h', - 'fnmatch.h', - 'ftw.h', - 'glob.h', - 'grp.h', - 'iconv.h', - 'langinfo.h', - 'libgen.h', - 'monetary.h', - 'mqueue.h', - 'ndbm.h', - 'net/if.h', - 'netdb.h', - 'netinet/in.h', - 'netinet/tcp.h', - 'nl_types.h', - 'poll.h', - 'pthread.h', - 'pwd.h', - 'regex.h', - 'sched.h', - 'search.h', - 'semaphore.h', - 'setjmp.h', - 'signal.h', - 'spawn.h', - 'strings.h', - 'stropts.h', - 'syslog.h', - 'tar.h', - 'termios.h', - 'trace.h', - 'ulimit.h', - 'unistd.h', - 'utime.h', - 'utmpx.h', - 'wordexp.h', - # additional GNUlib headers - 'a.out.h', - 'aliases.h', - 'alloca.h', - 'ar.h', - 'argp.h', - 'argz.h', - 'byteswap.h', - 'crypt.h', - 'endian.h', - 'envz.h', - 'err.h', - 'error.h', - 'execinfo.h', - 'fpu_control.h', - 'fstab.h', - 'fts.h', - 'getopt.h', - 'gshadow.h', - 'ieee754.h', - 'ifaddrs.h', - 'libintl.h', - 'mcheck.h', - 'mntent.h', - 'obstack.h', - 'paths.h', - 'printf.h', - 'pty.h', - 'resolv.h', - 'shadow.h', - 'sysexits.h', - 'ttyent.h', - # Additional linux glibc headers - 'dlfcn.h', - 'elf.h', - 'features.h', - 'gconv.h', - 'gnu-versions.h', - 'lastlog.h', - 'libio.h', - 'link.h', - 'malloc.h', - 'memory.h', - 'netash/ash.h', - 'netatalk/at.h', - 'netax25/ax25.h', - 'neteconet/ec.h', - 'netipx/ipx.h', - 'netiucv/iucv.h', - 'netpacket/packet.h', - 'netrom/netrom.h', - 'netrose/rose.h', - 'nfs/nfs.h', - 'nl_types.h', - 'nss.h', - 're_comp.h', - 'regexp.h', - 'sched.h', - 'sgtty.h', - 'stab.h', - 'stdc-predef.h', - 'stdio_ext.h', - 'syscall.h', - 'termio.h', - 'thread_db.h', - 'ucontext.h', - 'ustat.h', - 'utmp.h', - 'values.h', - 'wait.h', - 'xlocale.h', - # Hardware specific headers - 'arm_neon.h', - 'emmintrin.h', - 'xmmintin.h', - ]) +_C_HEADERS = frozenset( + [ + # System C headers + "assert.h", + "complex.h", + "ctype.h", + "errno.h", + "fenv.h", + "float.h", + "inttypes.h", + "iso646.h", + "limits.h", + "locale.h", + "math.h", + "setjmp.h", + "signal.h", + "stdalign.h", + "stdarg.h", + "stdatomic.h", + "stdbool.h", + "stddef.h", + "stdint.h", + "stdio.h", + "stdlib.h", + "stdnoreturn.h", + "string.h", + "tgmath.h", + "threads.h", + "time.h", + "uchar.h", + "wchar.h", + "wctype.h", + # C23 headers + "stdbit.h", + "stdckdint.h", + # additional POSIX C headers + "aio.h", + "arpa/inet.h", + "cpio.h", + "dirent.h", + "dlfcn.h", + "fcntl.h", + "fmtmsg.h", + "fnmatch.h", + "ftw.h", + "glob.h", + "grp.h", + "iconv.h", + "langinfo.h", + "libgen.h", + "monetary.h", + "mqueue.h", + "ndbm.h", + "net/if.h", + "netdb.h", + "netinet/in.h", + "netinet/tcp.h", + "nl_types.h", + "poll.h", + "pthread.h", + "pwd.h", + "regex.h", + "sched.h", + "search.h", + "semaphore.h", + "setjmp.h", + "signal.h", + "spawn.h", + "strings.h", + "stropts.h", + "syslog.h", + "tar.h", + "termios.h", + "trace.h", + "ulimit.h", + "unistd.h", + "utime.h", + "utmpx.h", + "wordexp.h", + # additional GNUlib headers + "a.out.h", + "aliases.h", + "alloca.h", + "ar.h", + "argp.h", + "argz.h", + "byteswap.h", + "crypt.h", + "endian.h", + "envz.h", + "err.h", + "error.h", + "execinfo.h", + "fpu_control.h", + "fstab.h", + "fts.h", + "getopt.h", + "gshadow.h", + "ieee754.h", + "ifaddrs.h", + "libintl.h", + "mcheck.h", + "mntent.h", + "obstack.h", + "paths.h", + "printf.h", + "pty.h", + "resolv.h", + "shadow.h", + "sysexits.h", + "ttyent.h", + # Additional linux glibc headers + "dlfcn.h", + "elf.h", + "features.h", + "gconv.h", + "gnu-versions.h", + "lastlog.h", + "libio.h", + "link.h", + "malloc.h", + "memory.h", + "netash/ash.h", + "netatalk/at.h", + "netax25/ax25.h", + "neteconet/ec.h", + "netipx/ipx.h", + "netiucv/iucv.h", + "netpacket/packet.h", + "netrom/netrom.h", + "netrose/rose.h", + "nfs/nfs.h", + "nl_types.h", + "nss.h", + "re_comp.h", + "regexp.h", + "sched.h", + "sgtty.h", + "stab.h", + "stdc-predef.h", + "stdio_ext.h", + "syscall.h", + "termio.h", + "thread_db.h", + "ucontext.h", + "ustat.h", + "utmp.h", + "values.h", + "wait.h", + "xlocale.h", + # Hardware specific headers + "arm_neon.h", + "emmintrin.h", + "xmmintin.h", + ] +) # Folders of C libraries so commonly used in C++, # that they have parity with standard C libraries. -C_STANDARD_HEADER_FOLDERS = frozenset([ - # standard C library - "sys", - # glibc for linux - "arpa", - "asm-generic", - "bits", - "gnu", - "net", - "netinet", - "protocols", - "rpc", - "rpcsvc", - "scsi", - # linux kernel header - "drm", - "linux", - "misc", - "mtd", - "rdma", - "sound", - "video", - "xen", - ]) +C_STANDARD_HEADER_FOLDERS = frozenset( + [ + # standard C library + "sys", + # glibc for linux + "arpa", + "asm-generic", + "bits", + "gnu", + "net", + "netinet", + "protocols", + "rpc", + "rpcsvc", + "scsi", + # linux kernel header + "drm", + "linux", + "misc", + "mtd", + "rdma", + "sound", + "video", + "xen", + ] +) # Type names _TYPES = re.compile( - r'^(?:' + r"^(?:" # [dcl.type.simple] - r'(char(16_t|32_t)?)|wchar_t|' - r'bool|short|int|long|signed|unsigned|float|double|' + r"(char(16_t|32_t)?)|wchar_t|" + r"bool|short|int|long|signed|unsigned|float|double|" # [support.types] - r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|' + r"(ptrdiff_t|size_t|max_align_t|nullptr_t)|" # [cstdint.syn] - r'(u?int(_fast|_least)?(8|16|32|64)_t)|' - r'(u?int(max|ptr)_t)|' - r')$') + r"(u?int(_fast|_least)?(8|16|32|64)_t)|" + r"(u?int(max|ptr)_t)|" + r")$" +) # These headers are excluded from [build/include] and [build/include_order] @@ -808,39 +810,53 @@ # uppercase character, such as Python.h or nsStringAPI.h, for example). # - Lua headers. _THIRD_PARTY_HEADERS_PATTERN = re.compile( - r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$') + r"^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$" +) # Pattern for matching FileInfo.BaseName() against test file name -_test_suffixes = ['_test', '_regtest', '_unittest'] -_TEST_FILE_SUFFIX = '(' + '|'.join(_test_suffixes) + r')$' +_test_suffixes = ["_test", "_regtest", "_unittest"] +_TEST_FILE_SUFFIX = "(" + "|".join(_test_suffixes) + r")$" # Pattern that matches only complete whitespace, possibly across multiple lines. -_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL) +_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r"^\s*$", re.DOTALL) # Assertion macros. These are defined in base/logging.h and # testing/base/public/gunit.h. _CHECK_MACROS = [ - 'DCHECK', 'CHECK', - 'EXPECT_TRUE', 'ASSERT_TRUE', - 'EXPECT_FALSE', 'ASSERT_FALSE', - ] + "DCHECK", + "CHECK", + "EXPECT_TRUE", + "ASSERT_TRUE", + "EXPECT_FALSE", + "ASSERT_FALSE", +] # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE _CHECK_REPLACEMENT = dict([(macro_var, {}) for macro_var in _CHECK_MACROS]) -for op, replacement in [('==', 'EQ'), ('!=', 'NE'), - ('>=', 'GE'), ('>', 'GT'), - ('<=', 'LE'), ('<', 'LT')]: - _CHECK_REPLACEMENT['DCHECK'][op] = f'DCHECK_{replacement}' - _CHECK_REPLACEMENT['CHECK'][op] = f'CHECK_{replacement}' - _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = f'EXPECT_{replacement}' - _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = f'ASSERT_{replacement}' - -for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), - ('>=', 'LT'), ('>', 'LE'), - ('<=', 'GT'), ('<', 'GE')]: - _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = f'EXPECT_{inv_replacement}' - _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = f'ASSERT_{inv_replacement}' +for op, replacement in [ + ("==", "EQ"), + ("!=", "NE"), + (">=", "GE"), + (">", "GT"), + ("<=", "LE"), + ("<", "LT"), +]: + _CHECK_REPLACEMENT["DCHECK"][op] = f"DCHECK_{replacement}" + _CHECK_REPLACEMENT["CHECK"][op] = f"CHECK_{replacement}" + _CHECK_REPLACEMENT["EXPECT_TRUE"][op] = f"EXPECT_{replacement}" + _CHECK_REPLACEMENT["ASSERT_TRUE"][op] = f"ASSERT_{replacement}" + +for op, inv_replacement in [ + ("==", "NE"), + ("!=", "EQ"), + (">=", "LT"), + (">", "LE"), + ("<=", "GT"), + ("<", "GE"), +]: + _CHECK_REPLACEMENT["EXPECT_FALSE"][op] = f"EXPECT_{inv_replacement}" + _CHECK_REPLACEMENT["ASSERT_FALSE"][op] = f"ASSERT_{inv_replacement}" # Alternative tokens and their replacements. For full list, see section 2.5 # Alternative tokens [lex.digraph] in the C++ standard. @@ -848,18 +864,18 @@ # Digraphs (such as '%:') are not included here since it's a mess to # match those on a word boundary. _ALT_TOKEN_REPLACEMENT = { - 'and': '&&', - 'bitor': '|', - 'or': '||', - 'xor': '^', - 'compl': '~', - 'bitand': '&', - 'and_eq': '&=', - 'or_eq': '|=', - 'xor_eq': '^=', - 'not': '!', - 'not_eq': '!=' - } + "and": "&&", + "bitor": "|", + "or": "||", + "xor": "^", + "compl": "~", + "bitand": "&", + "and_eq": "&=", + "or_eq": "|=", + "xor_eq": "^=", + "not": "!", + "not_eq": "!=", +} # Compile regular expression that matches all the above keywords. The "[ =()]" # bit is meant to avoid matching these keywords outside of boolean expressions. @@ -867,7 +883,8 @@ # False positives include C-style multi-line comments and multi-line strings # but those have always been troublesome for cpplint. _ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( - r'([ =()])(' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')([ (]|$)') + r"([ =()])(" + ("|".join(_ALT_TOKEN_REPLACEMENT.keys())) + r")([ (]|$)" +) # These constants define types of headers for use with @@ -880,37 +897,38 @@ _OTHER_HEADER = 6 # These constants define the current inline assembly state -_NO_ASM = 0 # Outside of inline assembly block -_INSIDE_ASM = 1 # Inside inline assembly block -_END_ASM = 2 # Last line of inline assembly block -_BLOCK_ASM = 3 # The whole block is an inline assembly block +_NO_ASM = 0 # Outside of inline assembly block +_INSIDE_ASM = 1 # Inside inline assembly block +_END_ASM = 2 # Last line of inline assembly block +_BLOCK_ASM = 3 # The whole block is an inline assembly block # Match start of assembly blocks -_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' - r'(?:\s+(volatile|__volatile__))?' - r'\s*[{(]') +_MATCH_ASM = re.compile( + r"^\s*(?:asm|_asm|__asm|__asm__)" r"(?:\s+(volatile|__volatile__))?" r"\s*[{(]" +) # Match strings that indicate we're working on a C (not C++) file. -_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|' - r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))') +_SEARCH_C_FILE = re.compile( + r"\b(?:LINT_C_FILE|" r"vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))" +) # Match string that indicates we're working on a Linux Kernel file. -_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)') +_SEARCH_KERNEL_FILE = re.compile(r"\b(?:LINT_KERNEL_FILE)") # Commands for sed to fix the problem _SED_FIXUPS = { - 'Remove spaces around =': r's/ = /=/', - 'Remove spaces around !=': r's/ != /!=/', - 'Remove space before ( in if (': r's/if (/if(/', - 'Remove space before ( in for (': r's/for (/for(/', - 'Remove space before ( in while (': r's/while (/while(/', - 'Remove space before ( in switch (': r's/switch (/switch(/', - 'Should have a space between // and comment': r's/\/\//\/\/ /', - 'Missing space before {': r's/\([^ ]\){/\1 {/', - 'Tab found, replace by spaces': r's/\t/ /g', - 'Line ends in whitespace. Consider deleting these extra spaces.': r's/\s*$//', - 'You don\'t need a ; after a }': r's/};/}/', - 'Missing space after ,': r's/,\([^ ]\)/, \1/g', + "Remove spaces around =": r"s/ = /=/", + "Remove spaces around !=": r"s/ != /!=/", + "Remove space before ( in if (": r"s/if (/if(/", + "Remove space before ( in for (": r"s/for (/for(/", + "Remove space before ( in while (": r"s/while (/while(/", + "Remove space before ( in switch (": r"s/switch (/switch(/", + "Should have a space between // and comment": r"s/\/\//\/\/ /", + "Missing space before {": r"s/\([^ ]\){/\1 {/", + "Tab found, replace by spaces": r"s/\t/ /g", + "Line ends in whitespace. Consider deleting these extra spaces.": r"s/\s*$//", + "You don't need a ; after a }": r"s/};/}/", + "Missing space after ,": r"s/,\([^ ]\)/, \1/g", } # {str, set(int)}: a map from error categories to sets of linenumbers @@ -947,886 +965,962 @@ # This is set by --headers flag. _hpp_headers = set([]) + class ErrorSuppressions: - """Class to track all error suppressions for cpplint""" - - class LineRange: - """Class to represent a range of line numbers for which an error is suppressed""" - def __init__(self, begin, end): - self.begin = begin - self.end = end - - def __str__(self): - return f'[{self.begin}-{self.end}]' - - def __contains__(self, obj): - return self.begin <= obj <= self.end - - def ContainsRange(self, other): - return self.begin <= other.begin and self.end >= other.end - - def __init__(self): - self._suppressions = collections.defaultdict(list) - self._open_block_suppression = None - - def _AddSuppression(self, category, line_range): - suppressed = self._suppressions[category] - if not (suppressed and suppressed[-1].ContainsRange(line_range)): - suppressed.append(line_range) - - def GetOpenBlockStart(self): - """:return: The start of the current open block or `-1` if there is not an open block""" - return self._open_block_suppression.begin if self._open_block_suppression else -1 - - def AddGlobalSuppression(self, category): - """Add a suppression for `category` which is suppressed for the whole file""" - self._AddSuppression(category, self.LineRange(0, math.inf)) - - def AddLineSuppression(self, category, linenum): - """Add a suppression for `category` which is suppressed only on `linenum`""" - self._AddSuppression(category, self.LineRange(linenum, linenum)) - - def StartBlockSuppression(self, category, linenum): - """Start a suppression block for `category` on `linenum`. inclusive""" - if self._open_block_suppression is None: - self._open_block_suppression = self.LineRange(linenum, math.inf) - self._AddSuppression(category, self._open_block_suppression) - - def EndBlockSuppression(self, linenum): - """End the current block suppression on `linenum`. inclusive""" - if self._open_block_suppression: - self._open_block_suppression.end = linenum - self._open_block_suppression = None - - def IsSuppressed(self, category, linenum): - """:return: `True` if `category` is suppressed for `linenum`""" - suppressed = self._suppressions[category] + self._suppressions[None] - return any(linenum in lr for lr in suppressed) - - def HasOpenBlock(self): - """:return: `True` if a block suppression was started but not ended""" - return self._open_block_suppression is not None - - def Clear(self): - """Clear all current error suppressions""" - self._suppressions.clear() - self._open_block_suppression = None + """Class to track all error suppressions for cpplint""" + + class LineRange: + """Class to represent a range of line numbers for which an error is suppressed""" + + def __init__(self, begin, end): + self.begin = begin + self.end = end + + def __str__(self): + return f"[{self.begin}-{self.end}]" + + def __contains__(self, obj): + return self.begin <= obj <= self.end + + def ContainsRange(self, other): + return self.begin <= other.begin and self.end >= other.end + + def __init__(self): + self._suppressions = collections.defaultdict(list) + self._open_block_suppression = None + + def _AddSuppression(self, category, line_range): + suppressed = self._suppressions[category] + if not (suppressed and suppressed[-1].ContainsRange(line_range)): + suppressed.append(line_range) + + def GetOpenBlockStart(self): + """:return: The start of the current open block or `-1` if there is not an open block""" + return ( + self._open_block_suppression.begin if self._open_block_suppression else -1 + ) + + def AddGlobalSuppression(self, category): + """Add a suppression for `category` which is suppressed for the whole file""" + self._AddSuppression(category, self.LineRange(0, math.inf)) + + def AddLineSuppression(self, category, linenum): + """Add a suppression for `category` which is suppressed only on `linenum`""" + self._AddSuppression(category, self.LineRange(linenum, linenum)) + + def StartBlockSuppression(self, category, linenum): + """Start a suppression block for `category` on `linenum`. inclusive""" + if self._open_block_suppression is None: + self._open_block_suppression = self.LineRange(linenum, math.inf) + self._AddSuppression(category, self._open_block_suppression) + + def EndBlockSuppression(self, linenum): + """End the current block suppression on `linenum`. inclusive""" + if self._open_block_suppression: + self._open_block_suppression.end = linenum + self._open_block_suppression = None + + def IsSuppressed(self, category, linenum): + """:return: `True` if `category` is suppressed for `linenum`""" + suppressed = self._suppressions[category] + self._suppressions[None] + return any(linenum in lr for lr in suppressed) + + def HasOpenBlock(self): + """:return: `True` if a block suppression was started but not ended""" + return self._open_block_suppression is not None + + def Clear(self): + """Clear all current error suppressions""" + self._suppressions.clear() + self._open_block_suppression = None + _error_suppressions = ErrorSuppressions() + def ProcessHppHeadersOption(val): - global _hpp_headers - try: - _hpp_headers = {ext.strip() for ext in val.split(',')} - except ValueError: - PrintUsage('Header extensions must be comma separated list.') + global _hpp_headers + try: + _hpp_headers = {ext.strip() for ext in val.split(",")} + except ValueError: + PrintUsage("Header extensions must be comma separated list.") + def ProcessIncludeOrderOption(val): - if val is None or val == "default": - pass - elif val == "standardcfirst": - global _include_order - _include_order = val - else: - PrintUsage('Invalid includeorder value %s. Expected default|standardcfirst') + if val is None or val == "default": + pass + elif val == "standardcfirst": + global _include_order + _include_order = val + else: + PrintUsage("Invalid includeorder value %s. Expected default|standardcfirst") + def IsHeaderExtension(file_extension): - return file_extension in GetHeaderExtensions() + return file_extension in GetHeaderExtensions() + def GetHeaderExtensions(): - if _hpp_headers: - return _hpp_headers - if _valid_extensions: - return {h for h in _valid_extensions if 'h' in h} - return set(['h', 'hh', 'hpp', 'hxx', 'h++', 'cuh']) + if _hpp_headers: + return _hpp_headers + if _valid_extensions: + return {h for h in _valid_extensions if "h" in h} + return set(["h", "hh", "hpp", "hxx", "h++", "cuh"]) + # The allowed extensions for file names # This is set by --extensions flag def GetAllExtensions(): - return GetHeaderExtensions().union(_valid_extensions or set( - ['c', 'cc', 'cpp', 'cxx', 'c++', 'cu'])) - -def ProcessExtensionsOption(val): - global _valid_extensions - try: - extensions = [ext.strip() for ext in val.split(',')] - _valid_extensions = set(extensions) - except ValueError: - PrintUsage('Extensions should be a comma-separated list of values;' - 'for example: extensions=hpp,cpp\n' - f'This could not be parsed: "{val}"') - -def GetNonHeaderExtensions(): - return GetAllExtensions().difference(GetHeaderExtensions()) + return GetHeaderExtensions().union( + _valid_extensions or set(["c", "cc", "cpp", "cxx", "c++", "cu"]) + ) -def ParseNolintSuppressions(filename, raw_line, linenum, error): - """Updates the global list of line error-suppressions. - - Parses any NOLINT comments on the current line, updating the global - error_suppressions store. Reports an error if the NOLINT comment - was malformed. - - Args: - filename: str, the name of the input file. - raw_line: str, the line of input text, with comments. - linenum: int, the number of the current line. - error: function, an error handler. - """ - matched = re.search(r'\bNOLINT(NEXTLINE|BEGIN|END)?\b(\([^)]+\))?', raw_line) - if matched: - no_lint_type = matched.group(1) - if no_lint_type == 'NEXTLINE': - def ProcessCategory(category): - _error_suppressions.AddLineSuppression(category, linenum + 1) - elif no_lint_type == 'BEGIN': - if _error_suppressions.HasOpenBlock(): - error(filename, linenum, 'readability/nolint', 5, - f'NONLINT block already defined on line {_error_suppressions.GetOpenBlockStart()}') - - def ProcessCategory(category): - _error_suppressions.StartBlockSuppression(category, linenum) - elif no_lint_type == 'END': - if not _error_suppressions.HasOpenBlock(): - error(filename, linenum, 'readability/nolint', 5, 'Not in a NOLINT block') - - def ProcessCategory(category): - if category is not None: - error(filename, linenum, 'readability/nolint', 5, - f'NOLINT categories not supported in block END: {category}') - _error_suppressions.EndBlockSuppression(linenum) - else: - def ProcessCategory(category): - _error_suppressions.AddLineSuppression(category, linenum) - categories = matched.group(2) - if categories in (None, '(*)'): # => "suppress all" - ProcessCategory(None) - elif categories.startswith('(') and categories.endswith(')'): - for category in set(map(lambda c: c.strip(), categories[1:-1].split(','))): - if category in _ERROR_CATEGORIES: - ProcessCategory(category) - elif any(c for c in _OTHER_NOLINT_CATEGORY_PREFIXES if category.startswith(c)): - # Ignore any categories from other tools. - pass - elif category not in _LEGACY_ERROR_CATEGORIES: - error(filename, linenum, 'readability/nolint', 5, - f'Unknown NOLINT error category: {category}') -def ProcessGlobalSuppresions(lines): - """Deprecated; use ProcessGlobalSuppressions.""" - ProcessGlobalSuppressions(lines) +def ProcessExtensionsOption(val): + global _valid_extensions + try: + extensions = [ext.strip() for ext in val.split(",")] + _valid_extensions = set(extensions) + except ValueError: + PrintUsage( + "Extensions should be a comma-separated list of values;" + "for example: extensions=hpp,cpp\n" + f'This could not be parsed: "{val}"' + ) -def ProcessGlobalSuppressions(lines): - """Updates the list of global error suppressions. - Parses any lint directives in the file that have global effect. +def GetNonHeaderExtensions(): + return GetAllExtensions().difference(GetHeaderExtensions()) - Args: - lines: An array of strings, each representing a line of the file, with the - last element being empty if the file is terminated with a newline. - """ - for line in lines: - if _SEARCH_C_FILE.search(line): - for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: - _error_suppressions.AddGlobalSuppression(category) - if _SEARCH_KERNEL_FILE.search(line): - for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: - _error_suppressions.AddGlobalSuppression(category) +def ParseNolintSuppressions(filename, raw_line, linenum, error): + """Updates the global list of line error-suppressions. -def ResetNolintSuppressions(): - """Resets the set of NOLINT suppressions to empty.""" - _error_suppressions.Clear() + Parses any NOLINT comments on the current line, updating the global + error_suppressions store. Reports an error if the NOLINT comment + was malformed. + Args: + filename: str, the name of the input file. + raw_line: str, the line of input text, with comments. + linenum: int, the number of the current line. + error: function, an error handler. + """ + matched = re.search(r"\bNOLINT(NEXTLINE|BEGIN|END)?\b(\([^)]+\))?", raw_line) + if matched: + no_lint_type = matched.group(1) + if no_lint_type == "NEXTLINE": + + def ProcessCategory(category): + _error_suppressions.AddLineSuppression(category, linenum + 1) + + elif no_lint_type == "BEGIN": + if _error_suppressions.HasOpenBlock(): + error( + filename, + linenum, + "readability/nolint", + 5, + f"NONLINT block already defined on line {_error_suppressions.GetOpenBlockStart()}", + ) + + def ProcessCategory(category): + _error_suppressions.StartBlockSuppression(category, linenum) + + elif no_lint_type == "END": + if not _error_suppressions.HasOpenBlock(): + error( + filename, linenum, "readability/nolint", 5, "Not in a NOLINT block" + ) + + def ProcessCategory(category): + if category is not None: + error( + filename, + linenum, + "readability/nolint", + 5, + f"NOLINT categories not supported in block END: {category}", + ) + _error_suppressions.EndBlockSuppression(linenum) -def IsErrorSuppressedByNolint(category, linenum): - """Returns true if the specified error category is suppressed on this line. + else: - Consults the global error_suppressions map populated by - ParseNolintSuppressions/ProcessGlobalSuppressions/ResetNolintSuppressions. + def ProcessCategory(category): + _error_suppressions.AddLineSuppression(category, linenum) + + categories = matched.group(2) + if categories in (None, "(*)"): # => "suppress all" + ProcessCategory(None) + elif categories.startswith("(") and categories.endswith(")"): + for category in set(map(lambda c: c.strip(), categories[1:-1].split(","))): + if category in _ERROR_CATEGORIES: + ProcessCategory(category) + elif any( + c for c in _OTHER_NOLINT_CATEGORY_PREFIXES if category.startswith(c) + ): + # Ignore any categories from other tools. + pass + elif category not in _LEGACY_ERROR_CATEGORIES: + error( + filename, + linenum, + "readability/nolint", + 5, + f"Unknown NOLINT error category: {category}", + ) - Args: - category: str, the category of the error. - linenum: int, the current line number. - Returns: - bool, True iff the error should be suppressed due to a NOLINT comment, - block suppression or global suppression. - """ - return _error_suppressions.IsSuppressed(category, linenum) +def ProcessGlobalSuppresions(lines): + """Deprecated; use ProcessGlobalSuppressions.""" + ProcessGlobalSuppressions(lines) -def _IsSourceExtension(s): - """File extension (excluding dot) matches a source file extension.""" - return s in GetNonHeaderExtensions() +def ProcessGlobalSuppressions(lines): + """Updates the list of global error suppressions. -class _IncludeState(object): - """Tracks line numbers for includes, and the order in which includes appear. - - include_list contains list of lists of (header, line number) pairs. - It's a lists of lists rather than just one flat list to make it - easier to update across preprocessor boundaries. - - Call CheckNextIncludeOrder() once for each header in the file, passing - in the type constants defined above. Calls in an illegal order will - raise an _IncludeError with an appropriate error message. - - """ - # self._section will move monotonically through this set. If it ever - # needs to move backwards, CheckNextIncludeOrder will raise an error. - _INITIAL_SECTION = 0 - _MY_H_SECTION = 1 - _C_SECTION = 2 - _CPP_SECTION = 3 - _OTHER_SYS_SECTION = 4 - _OTHER_H_SECTION = 5 - - _TYPE_NAMES = { - _C_SYS_HEADER: 'C system header', - _CPP_SYS_HEADER: 'C++ system header', - _OTHER_SYS_HEADER: 'other system header', - _LIKELY_MY_HEADER: 'header this file implements', - _POSSIBLE_MY_HEADER: 'header this file may implement', - _OTHER_HEADER: 'other header', - } - _SECTION_NAMES = { - _INITIAL_SECTION: "... nothing. (This can't be an error.)", - _MY_H_SECTION: 'a header this file implements', - _C_SECTION: 'C system header', - _CPP_SECTION: 'C++ system header', - _OTHER_SYS_SECTION: 'other system header', - _OTHER_H_SECTION: 'other header', - } - - def __init__(self): - self.include_list = [[]] - self._section = None - self._last_header = None - self.ResetSection('') - - def FindHeader(self, header): - """Check if a header has already been included. + Parses any lint directives in the file that have global effect. Args: - header: header to check. - Returns: - Line number of previous occurrence, or -1 if the header has not - been seen before. + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. """ - for section_list in self.include_list: - for f in section_list: - if f[0] == header: - return f[1] - return -1 - - def ResetSection(self, directive): - """Reset section checking for preprocessor directive. + for line in lines: + if _SEARCH_C_FILE.search(line): + for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: + _error_suppressions.AddGlobalSuppression(category) + if _SEARCH_KERNEL_FILE.search(line): + for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: + _error_suppressions.AddGlobalSuppression(category) - Args: - directive: preprocessor directive (e.g. "if", "else"). - """ - # The name of the current section. - self._section = self._INITIAL_SECTION - # The path of last found header. - self._last_header = '' - # Update list of includes. Note that we never pop from the - # include list. - if directive in ('if', 'ifdef', 'ifndef'): - self.include_list.append([]) - elif directive in ('else', 'elif'): - self.include_list[-1] = [] +def ResetNolintSuppressions(): + """Resets the set of NOLINT suppressions to empty.""" + _error_suppressions.Clear() - def SetLastHeader(self, header_path): - self._last_header = header_path - def CanonicalizeAlphabeticalOrder(self, header_path): - """Returns a path canonicalized for alphabetical comparison. +def IsErrorSuppressedByNolint(category, linenum): + """Returns true if the specified error category is suppressed on this line. - - replaces "-" with "_" so they both cmp the same. - - removes '-inl' since we don't require them to be after the main header. - - lowercase everything, just in case. + Consults the global error_suppressions map populated by + ParseNolintSuppressions/ProcessGlobalSuppressions/ResetNolintSuppressions. Args: - header_path: Path to be canonicalized. - + category: str, the category of the error. + linenum: int, the current line number. Returns: - Canonicalized path. + bool, True iff the error should be suppressed due to a NOLINT comment, + block suppression or global suppression. """ - return header_path.replace('-inl.h', '.h').replace('-', '_').lower() - - def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): - """Check if a header is in alphabetical order with the previous header. + return _error_suppressions.IsSuppressed(category, linenum) - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - header_path: Canonicalized header to be checked. - Returns: - Returns true if the header is in alphabetical order. - """ - # If previous section is different from current section, _last_header will - # be reset to empty string, so it's always less than current header. - # - # If previous line was a blank line, assume that the headers are - # intentionally sorted the way they are. - if (self._last_header > header_path and - re.match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])): - return False - return True +def _IsSourceExtension(s): + """File extension (excluding dot) matches a source file extension.""" + return s in GetNonHeaderExtensions() - def CheckNextIncludeOrder(self, header_type): - """Returns a non-empty error message if the next header is out of order. - This function also updates the internal state to be ready to check - the next include. +class _IncludeState(object): + """Tracks line numbers for includes, and the order in which includes appear. - Args: - header_type: One of the _XXX_HEADER constants defined above. + include_list contains list of lists of (header, line number) pairs. + It's a lists of lists rather than just one flat list to make it + easier to update across preprocessor boundaries. - Returns: - The empty string if the header is in the right order, or an - error message describing what's wrong. + Call CheckNextIncludeOrder() once for each header in the file, passing + in the type constants defined above. Calls in an illegal order will + raise an _IncludeError with an appropriate error message. """ - error_message = (f'Found {self._TYPE_NAMES[header_type]}' - f' after {self._SECTION_NAMES[self._section]}') - - last_section = self._section - - if header_type == _C_SYS_HEADER: - if self._section <= self._C_SECTION: - self._section = self._C_SECTION - else: - self._last_header = '' - return error_message - elif header_type == _CPP_SYS_HEADER: - if self._section <= self._CPP_SECTION: - self._section = self._CPP_SECTION - else: - self._last_header = '' - return error_message - elif header_type == _OTHER_SYS_HEADER: - if self._section <= self._OTHER_SYS_SECTION: - self._section = self._OTHER_SYS_SECTION - else: - self._last_header = '' - return error_message - elif header_type == _LIKELY_MY_HEADER: - if self._section <= self._MY_H_SECTION: - self._section = self._MY_H_SECTION - else: - self._section = self._OTHER_H_SECTION - elif header_type == _POSSIBLE_MY_HEADER: - if self._section <= self._MY_H_SECTION: - self._section = self._MY_H_SECTION - else: - # This will always be the fallback because we're not sure - # enough that the header is associated with this file. - self._section = self._OTHER_H_SECTION - else: - assert header_type == _OTHER_HEADER - self._section = self._OTHER_H_SECTION - - if last_section != self._section: - self._last_header = '' - - return '' + # self._section will move monotonically through this set. If it ever + # needs to move backwards, CheckNextIncludeOrder will raise an error. + _INITIAL_SECTION = 0 + _MY_H_SECTION = 1 + _C_SECTION = 2 + _CPP_SECTION = 3 + _OTHER_SYS_SECTION = 4 + _OTHER_H_SECTION = 5 + + _TYPE_NAMES = { + _C_SYS_HEADER: "C system header", + _CPP_SYS_HEADER: "C++ system header", + _OTHER_SYS_HEADER: "other system header", + _LIKELY_MY_HEADER: "header this file implements", + _POSSIBLE_MY_HEADER: "header this file may implement", + _OTHER_HEADER: "other header", + } + _SECTION_NAMES = { + _INITIAL_SECTION: "... nothing. (This can't be an error.)", + _MY_H_SECTION: "a header this file implements", + _C_SECTION: "C system header", + _CPP_SECTION: "C++ system header", + _OTHER_SYS_SECTION: "other system header", + _OTHER_H_SECTION: "other header", + } -class _CppLintState(object): - """Maintains module-wide state..""" - - def __init__(self): - self.verbose_level = 1 # global setting. - self.error_count = 0 # global count of reported errors - # filters to apply when emitting error messages - self.filters = _DEFAULT_FILTERS[:] - # backup of filter list. Used to restore the state after each file. - self._filters_backup = self.filters[:] - self.counting = 'total' # In what way are we counting errors? - self.errors_by_category = {} # string to int dict storing error counts - self.quiet = False # Suppress non-error messages? - - # output format: - # "emacs" - format that emacs can parse (default) - # "eclipse" - format that eclipse can parse - # "vs7" - format that Microsoft Visual Studio 7 can parse - # "junit" - format that Jenkins, Bamboo, etc can parse - # "sed" - returns a gnu sed command to fix the problem - # "gsed" - like sed, but names the command gsed, e.g. for macOS homebrew users - self.output_format = 'emacs' - - # For JUnit output, save errors and failures until the end so that they - # can be written into the XML - self._junit_errors = [] - self._junit_failures = [] - - def SetOutputFormat(self, output_format): - """Sets the output format for errors.""" - self.output_format = output_format - - def SetQuiet(self, quiet): - """Sets the module's quiet settings, and returns the previous setting.""" - last_quiet = self.quiet - self.quiet = quiet - return last_quiet - - def SetVerboseLevel(self, level): - """Sets the module's verbosity, and returns the previous setting.""" - last_verbose_level = self.verbose_level - self.verbose_level = level - return last_verbose_level + def __init__(self): + self.include_list = [[]] + self._section = None + self._last_header = None + self.ResetSection("") + + def FindHeader(self, header): + """Check if a header has already been included. + + Args: + header: header to check. + Returns: + Line number of previous occurrence, or -1 if the header has not + been seen before. + """ + for section_list in self.include_list: + for f in section_list: + if f[0] == header: + return f[1] + return -1 + + def ResetSection(self, directive): + """Reset section checking for preprocessor directive. + + Args: + directive: preprocessor directive (e.g. "if", "else"). + """ + # The name of the current section. + self._section = self._INITIAL_SECTION + # The path of last found header. + self._last_header = "" + + # Update list of includes. Note that we never pop from the + # include list. + if directive in ("if", "ifdef", "ifndef"): + self.include_list.append([]) + elif directive in ("else", "elif"): + self.include_list[-1] = [] + + def SetLastHeader(self, header_path): + self._last_header = header_path + + def CanonicalizeAlphabeticalOrder(self, header_path): + """Returns a path canonicalized for alphabetical comparison. + + - replaces "-" with "_" so they both cmp the same. + - removes '-inl' since we don't require them to be after the main header. + - lowercase everything, just in case. + + Args: + header_path: Path to be canonicalized. + + Returns: + Canonicalized path. + """ + return header_path.replace("-inl.h", ".h").replace("-", "_").lower() + + def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): + """Check if a header is in alphabetical order with the previous header. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + header_path: Canonicalized header to be checked. + + Returns: + Returns true if the header is in alphabetical order. + """ + # If previous section is different from current section, _last_header will + # be reset to empty string, so it's always less than current header. + # + # If previous line was a blank line, assume that the headers are + # intentionally sorted the way they are. + if self._last_header > header_path and re.match( + r"^\s*#\s*include\b", clean_lines.elided[linenum - 1] + ): + return False + return True - def SetCountingStyle(self, counting_style): - """Sets the module's counting options.""" - self.counting = counting_style + def CheckNextIncludeOrder(self, header_type): + """Returns a non-empty error message if the next header is out of order. + + This function also updates the internal state to be ready to check + the next include. + + Args: + header_type: One of the _XXX_HEADER constants defined above. + + Returns: + The empty string if the header is in the right order, or an + error message describing what's wrong. + + """ + error_message = ( + f"Found {self._TYPE_NAMES[header_type]}" + f" after {self._SECTION_NAMES[self._section]}" + ) + + last_section = self._section + + if header_type == _C_SYS_HEADER: + if self._section <= self._C_SECTION: + self._section = self._C_SECTION + else: + self._last_header = "" + return error_message + elif header_type == _CPP_SYS_HEADER: + if self._section <= self._CPP_SECTION: + self._section = self._CPP_SECTION + else: + self._last_header = "" + return error_message + elif header_type == _OTHER_SYS_HEADER: + if self._section <= self._OTHER_SYS_SECTION: + self._section = self._OTHER_SYS_SECTION + else: + self._last_header = "" + return error_message + elif header_type == _LIKELY_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + self._section = self._OTHER_H_SECTION + elif header_type == _POSSIBLE_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + # This will always be the fallback because we're not sure + # enough that the header is associated with this file. + self._section = self._OTHER_H_SECTION + else: + assert header_type == _OTHER_HEADER + self._section = self._OTHER_H_SECTION - def SetFilters(self, filters): - """Sets the error-message filters. + if last_section != self._section: + self._last_header = "" - These filters are applied when deciding whether to emit a given - error message. + return "" - Args: - filters: A string of comma-separated filters (eg "+whitespace/indent"). - Each filter should start with + or -; else we die. - Raises: - ValueError: The comma-separated filters did not all start with '+' or '-'. - E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" - """ - # Default filters always have less priority than the flag ones. - self.filters = _DEFAULT_FILTERS[:] - self.AddFilters(filters) - - def AddFilters(self, filters): - """ Adds more filters to the existing list of error-message filters. """ - for filt in filters.split(','): - clean_filt = filt.strip() - if clean_filt: - self.filters.append(clean_filt) - for filt in self.filters: - if not (filt.startswith('+') or filt.startswith('-')): - raise ValueError('Every filter in --filters must start with + or -' - f' ({filt} does not)') - - def BackupFilters(self): - """ Saves the current filter list to backup storage.""" - self._filters_backup = self.filters[:] - - def RestoreFilters(self): - """ Restores filters previously backed up.""" - self.filters = self._filters_backup[:] - - def ResetErrorCounts(self): - """Sets the module's error statistic back to zero.""" - self.error_count = 0 - self.errors_by_category = {} - - def IncrementErrorCount(self, category): - """Bumps the module's error statistic.""" - self.error_count += 1 - if self.counting in ('toplevel', 'detailed'): - if self.counting != 'detailed': - category = category.split('/')[0] - if category not in self.errors_by_category: - self.errors_by_category[category] = 0 - self.errors_by_category[category] += 1 - - def PrintErrorCounts(self): - """Print a summary of errors by category, and the total.""" - for category, count in sorted(dict.items(self.errors_by_category)): - self.PrintInfo(f'Category \'{category}\' errors found: {count}\n') - if self.error_count > 0: - self.PrintInfo(f'Total errors found: {self.error_count}\n') - - def PrintInfo(self, message): - # _quiet does not represent --quiet flag. - # Hide infos from stdout to keep stdout pure for machine consumption - if not _quiet and self.output_format not in _MACHINE_OUTPUTS: - sys.stdout.write(message) - - def PrintError(self, message): - if self.output_format == 'junit': - self._junit_errors.append(message) - else: - sys.stderr.write(message) +class _CppLintState(object): + """Maintains module-wide state..""" + + def __init__(self): + self.verbose_level = 1 # global setting. + self.error_count = 0 # global count of reported errors + # filters to apply when emitting error messages + self.filters = _DEFAULT_FILTERS[:] + # backup of filter list. Used to restore the state after each file. + self._filters_backup = self.filters[:] + self.counting = "total" # In what way are we counting errors? + self.errors_by_category = {} # string to int dict storing error counts + self.quiet = False # Suppress non-error messages? + + # output format: + # "emacs" - format that emacs can parse (default) + # "eclipse" - format that eclipse can parse + # "vs7" - format that Microsoft Visual Studio 7 can parse + # "junit" - format that Jenkins, Bamboo, etc can parse + # "sed" - returns a gnu sed command to fix the problem + # "gsed" - like sed, but names the command gsed, e.g. for macOS homebrew users + self.output_format = "emacs" + + # For JUnit output, save errors and failures until the end so that they + # can be written into the XML + self._junit_errors = [] + self._junit_failures = [] + + def SetOutputFormat(self, output_format): + """Sets the output format for errors.""" + self.output_format = output_format + + def SetQuiet(self, quiet): + """Sets the module's quiet settings, and returns the previous setting.""" + last_quiet = self.quiet + self.quiet = quiet + return last_quiet + + def SetVerboseLevel(self, level): + """Sets the module's verbosity, and returns the previous setting.""" + last_verbose_level = self.verbose_level + self.verbose_level = level + return last_verbose_level + + def SetCountingStyle(self, counting_style): + """Sets the module's counting options.""" + self.counting = counting_style + + def SetFilters(self, filters): + """Sets the error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "+whitespace/indent"). + Each filter should start with + or -; else we die. + + Raises: + ValueError: The comma-separated filters did not all start with '+' or '-'. + E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" + """ + # Default filters always have less priority than the flag ones. + self.filters = _DEFAULT_FILTERS[:] + self.AddFilters(filters) + + def AddFilters(self, filters): + """Adds more filters to the existing list of error-message filters.""" + for filt in filters.split(","): + clean_filt = filt.strip() + if clean_filt: + self.filters.append(clean_filt) + for filt in self.filters: + if not (filt.startswith("+") or filt.startswith("-")): + raise ValueError( + "Every filter in --filters must start with + or -" + f" ({filt} does not)" + ) + + def BackupFilters(self): + """Saves the current filter list to backup storage.""" + self._filters_backup = self.filters[:] + + def RestoreFilters(self): + """Restores filters previously backed up.""" + self.filters = self._filters_backup[:] + + def ResetErrorCounts(self): + """Sets the module's error statistic back to zero.""" + self.error_count = 0 + self.errors_by_category = {} + + def IncrementErrorCount(self, category): + """Bumps the module's error statistic.""" + self.error_count += 1 + if self.counting in ("toplevel", "detailed"): + if self.counting != "detailed": + category = category.split("/")[0] + if category not in self.errors_by_category: + self.errors_by_category[category] = 0 + self.errors_by_category[category] += 1 + + def PrintErrorCounts(self): + """Print a summary of errors by category, and the total.""" + for category, count in sorted(dict.items(self.errors_by_category)): + self.PrintInfo(f"Category '{category}' errors found: {count}\n") + if self.error_count > 0: + self.PrintInfo(f"Total errors found: {self.error_count}\n") + + def PrintInfo(self, message): + # _quiet does not represent --quiet flag. + # Hide infos from stdout to keep stdout pure for machine consumption + if not _quiet and self.output_format not in _MACHINE_OUTPUTS: + sys.stdout.write(message) + + def PrintError(self, message): + if self.output_format == "junit": + self._junit_errors.append(message) + else: + sys.stderr.write(message) - def AddJUnitFailure(self, filename, linenum, message, category, confidence): - self._junit_failures.append((filename, linenum, message, category, - confidence)) + def AddJUnitFailure(self, filename, linenum, message, category, confidence): + self._junit_failures.append((filename, linenum, message, category, confidence)) - def FormatJUnitXML(self): - num_errors = len(self._junit_errors) - num_failures = len(self._junit_failures) + def FormatJUnitXML(self): + num_errors = len(self._junit_errors) + num_failures = len(self._junit_failures) - testsuite = xml.etree.ElementTree.Element('testsuite') - testsuite.attrib['errors'] = str(num_errors) - testsuite.attrib['failures'] = str(num_failures) - testsuite.attrib['name'] = 'cpplint' + testsuite = xml.etree.ElementTree.Element("testsuite") + testsuite.attrib["errors"] = str(num_errors) + testsuite.attrib["failures"] = str(num_failures) + testsuite.attrib["name"] = "cpplint" - if num_errors == 0 and num_failures == 0: - testsuite.attrib['tests'] = str(1) - xml.etree.ElementTree.SubElement(testsuite, 'testcase', name='passed') + if num_errors == 0 and num_failures == 0: + testsuite.attrib["tests"] = str(1) + xml.etree.ElementTree.SubElement(testsuite, "testcase", name="passed") - else: - testsuite.attrib['tests'] = str(num_errors + num_failures) - if num_errors > 0: - testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') - testcase.attrib['name'] = 'errors' - error = xml.etree.ElementTree.SubElement(testcase, 'error') - error.text = '\n'.join(self._junit_errors) - if num_failures > 0: - # Group failures by file - failed_file_order = [] - failures_by_file = {} - for failure in self._junit_failures: - failed_file = failure[0] - if failed_file not in failed_file_order: - failed_file_order.append(failed_file) - failures_by_file[failed_file] = [] - failures_by_file[failed_file].append(failure) - # Create a testcase for each file - for failed_file in failed_file_order: - failures = failures_by_file[failed_file] - testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') - testcase.attrib['name'] = failed_file - failure = xml.etree.ElementTree.SubElement(testcase, 'failure') - template = '{0}: {1} [{2}] [{3}]' - texts = [template.format(f[1], f[2], f[3], f[4]) for f in failures] - failure.text = '\n'.join(texts) - - xml_decl = '\n' - return xml_decl + xml.etree.ElementTree.tostring(testsuite, 'utf-8').decode('utf-8') + else: + testsuite.attrib["tests"] = str(num_errors + num_failures) + if num_errors > 0: + testcase = xml.etree.ElementTree.SubElement(testsuite, "testcase") + testcase.attrib["name"] = "errors" + error = xml.etree.ElementTree.SubElement(testcase, "error") + error.text = "\n".join(self._junit_errors) + if num_failures > 0: + # Group failures by file + failed_file_order = [] + failures_by_file = {} + for failure in self._junit_failures: + failed_file = failure[0] + if failed_file not in failed_file_order: + failed_file_order.append(failed_file) + failures_by_file[failed_file] = [] + failures_by_file[failed_file].append(failure) + # Create a testcase for each file + for failed_file in failed_file_order: + failures = failures_by_file[failed_file] + testcase = xml.etree.ElementTree.SubElement(testsuite, "testcase") + testcase.attrib["name"] = failed_file + failure = xml.etree.ElementTree.SubElement(testcase, "failure") + template = "{0}: {1} [{2}] [{3}]" + texts = [template.format(f[1], f[2], f[3], f[4]) for f in failures] + failure.text = "\n".join(texts) + + xml_decl = '\n' + return xml_decl + xml.etree.ElementTree.tostring(testsuite, "utf-8").decode( + "utf-8" + ) _cpplint_state = _CppLintState() def _OutputFormat(): - """Gets the module's output format.""" - return _cpplint_state.output_format + """Gets the module's output format.""" + return _cpplint_state.output_format def _SetOutputFormat(output_format): - """Sets the module's output format.""" - _cpplint_state.SetOutputFormat(output_format) + """Sets the module's output format.""" + _cpplint_state.SetOutputFormat(output_format) + def _Quiet(): - """Return's the module's quiet setting.""" - return _cpplint_state.quiet + """Return's the module's quiet setting.""" + return _cpplint_state.quiet + def _SetQuiet(quiet): - """Set the module's quiet status, and return previous setting.""" - return _cpplint_state.SetQuiet(quiet) + """Set the module's quiet status, and return previous setting.""" + return _cpplint_state.SetQuiet(quiet) def _VerboseLevel(): - """Returns the module's verbosity setting.""" - return _cpplint_state.verbose_level + """Returns the module's verbosity setting.""" + return _cpplint_state.verbose_level def _SetVerboseLevel(level): - """Sets the module's verbosity, and returns the previous setting.""" - return _cpplint_state.SetVerboseLevel(level) + """Sets the module's verbosity, and returns the previous setting.""" + return _cpplint_state.SetVerboseLevel(level) def _SetCountingStyle(level): - """Sets the module's counting options.""" - _cpplint_state.SetCountingStyle(level) + """Sets the module's counting options.""" + _cpplint_state.SetCountingStyle(level) def _Filters(): - """Returns the module's list of output filters, as a list.""" - return _cpplint_state.filters + """Returns the module's list of output filters, as a list.""" + return _cpplint_state.filters def _SetFilters(filters): - """Sets the module's error-message filters. - - These filters are applied when deciding whether to emit a given - error message. - - Args: - filters: A string of comma-separated filters (eg "whitespace/indent"). - Each filter should start with + or -; else we die. - """ - _cpplint_state.SetFilters(filters) - -def _AddFilters(filters): - """Adds more filter overrides. - - Unlike _SetFilters, this function does not reset the current list of filters - available. - - Args: - filters: A string of comma-separated filters (eg "whitespace/indent"). - Each filter should start with + or -; else we die. - """ - _cpplint_state.AddFilters(filters) - -def _BackupFilters(): - """ Saves the current filter list to backup storage.""" - _cpplint_state.BackupFilters() - -def _RestoreFilters(): - """ Restores filters previously backed up.""" - _cpplint_state.RestoreFilters() - -class _FunctionState(object): - """Tracks current function name and the number of lines in its body.""" - - _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. - _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. - - def __init__(self): - self.in_a_function = False - self.lines_in_function = 0 - self.current_function = '' + """Sets the module's error-message filters. - def Begin(self, function_name): - """Start analyzing function body. + These filters are applied when deciding whether to emit a given + error message. Args: - function_name: The name of the function being tracked. + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. """ - self.in_a_function = True - self.lines_in_function = 0 - self.current_function = function_name + _cpplint_state.SetFilters(filters) + - def Count(self): - """Count line in current function body.""" - if self.in_a_function: - self.lines_in_function += 1 +def _AddFilters(filters): + """Adds more filter overrides. - def Check(self, error, filename, linenum): - """Report if too many lines in function body. + Unlike _SetFilters, this function does not reset the current list of filters + available. Args: - error: The function to call with any errors found. - filename: The name of the current file. - linenum: The number of the line to check. + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. """ - if not self.in_a_function: - return - - if re.match(r'T(EST|est)', self.current_function): - base_trigger = self._TEST_TRIGGER - else: - base_trigger = self._NORMAL_TRIGGER - trigger = base_trigger * 2**_VerboseLevel() + _cpplint_state.AddFilters(filters) - if self.lines_in_function > trigger: - error_level = int(math.log(self.lines_in_function / base_trigger, 2)) - # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... - if error_level > 5: - error_level = 5 - error(filename, linenum, 'readability/fn_size', error_level, - 'Small and focused functions are preferred:' - f' {self.current_function} has {self.lines_in_function} non-comment lines' - f' (error triggered by exceeding {trigger} lines).') - def End(self): - """Stop analyzing function body.""" - self.in_a_function = False +def _BackupFilters(): + """Saves the current filter list to backup storage.""" + _cpplint_state.BackupFilters() -class _IncludeError(Exception): - """Indicates a problem with the include order in a file.""" - pass +def _RestoreFilters(): + """Restores filters previously backed up.""" + _cpplint_state.RestoreFilters() -class FileInfo(object): - """Provides utility functions for filenames. +class _FunctionState(object): + """Tracks current function name and the number of lines in its body.""" + + _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. + _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. + + def __init__(self): + self.in_a_function = False + self.lines_in_function = 0 + self.current_function = "" + + def Begin(self, function_name): + """Start analyzing function body. + + Args: + function_name: The name of the function being tracked. + """ + self.in_a_function = True + self.lines_in_function = 0 + self.current_function = function_name + + def Count(self): + """Count line in current function body.""" + if self.in_a_function: + self.lines_in_function += 1 + + def Check(self, error, filename, linenum): + """Report if too many lines in function body. + + Args: + error: The function to call with any errors found. + filename: The name of the current file. + linenum: The number of the line to check. + """ + if not self.in_a_function: + return + + if re.match(r"T(EST|est)", self.current_function): + base_trigger = self._TEST_TRIGGER + else: + base_trigger = self._NORMAL_TRIGGER + trigger = base_trigger * 2 ** _VerboseLevel() + + if self.lines_in_function > trigger: + error_level = int(math.log(self.lines_in_function / base_trigger, 2)) + # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... + if error_level > 5: + error_level = 5 + error( + filename, + linenum, + "readability/fn_size", + error_level, + "Small and focused functions are preferred:" + f" {self.current_function} has {self.lines_in_function} non-comment lines" + f" (error triggered by exceeding {trigger} lines).", + ) + + def End(self): + """Stop analyzing function body.""" + self.in_a_function = False - FileInfo provides easy access to the components of a file's path - relative to the project root. - """ - def __init__(self, filename): - self._filename = filename +class _IncludeError(Exception): + """Indicates a problem with the include order in a file.""" - def FullName(self): - """Make Windows paths like Unix.""" - return os.path.abspath(self._filename).replace('\\', '/') + pass - def RepositoryName(self): - r"""FullName after removing the local path to the repository. - If we have a real absolute path name here we can try to do something smart: - detecting the root of the checkout and truncating /path/to/checkout from - the name so that we get header guards that don't include things like - "C:\\Documents and Settings\\..." or "/home/username/..." in them and thus - people on different computers who have checked the source out to different - locations won't see bogus errors. - """ - fullname = self.FullName() - - if os.path.exists(fullname): - project_dir = os.path.dirname(fullname) - - # If the user specified a repository path, it exists, and the file is - # contained in it, use the specified repository path - if _repository: - repo = FileInfo(_repository).FullName() - root_dir = project_dir - while os.path.exists(root_dir): - # allow case insensitive compare on Windows - if os.path.normcase(root_dir) == os.path.normcase(repo): - return os.path.relpath(fullname, root_dir).replace('\\', '/') - one_up_dir = os.path.dirname(root_dir) - if one_up_dir == root_dir: - break - root_dir = one_up_dir - - if os.path.exists(os.path.join(project_dir, ".svn")): - # If there's a .svn file in the current directory, we recursively look - # up the directory tree for the top of the SVN checkout - root_dir = project_dir - one_up_dir = os.path.dirname(root_dir) - while os.path.exists(os.path.join(one_up_dir, ".svn")): - root_dir = os.path.dirname(root_dir) - one_up_dir = os.path.dirname(one_up_dir) - - prefix = os.path.commonprefix([root_dir, project_dir]) - return fullname[len(prefix) + 1:] - - # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by - # searching up from the current path. - root_dir = current_dir = os.path.dirname(fullname) - while current_dir != os.path.dirname(current_dir): - if (os.path.exists(os.path.join(current_dir, ".git")) or - os.path.exists(os.path.join(current_dir, ".hg")) or - os.path.exists(os.path.join(current_dir, ".svn"))): - root_dir = current_dir - break - current_dir = os.path.dirname(current_dir) - - if (os.path.exists(os.path.join(root_dir, ".git")) or - os.path.exists(os.path.join(root_dir, ".hg")) or - os.path.exists(os.path.join(root_dir, ".svn"))): - prefix = os.path.commonprefix([root_dir, project_dir]) - return fullname[len(prefix) + 1:] - - # Don't know what to do; header guard warnings may be wrong... - return fullname - - def Split(self): - """Splits the file into the directory, basename, and extension. - - For 'chrome/browser/browser.cc', Split() would - return ('chrome/browser', 'browser', '.cc') +class FileInfo(object): + """Provides utility functions for filenames. - Returns: - A tuple of (directory, basename, extension). + FileInfo provides easy access to the components of a file's path + relative to the project root. """ - googlename = self.RepositoryName() - project, rest = os.path.split(googlename) - return (project,) + os.path.splitext(rest) - - def BaseName(self): - """File base name - text after the final slash, before the final period.""" - return self.Split()[1] + def __init__(self, filename): + self._filename = filename + + def FullName(self): + """Make Windows paths like Unix.""" + return os.path.abspath(self._filename).replace("\\", "/") + + def RepositoryName(self): + r"""FullName after removing the local path to the repository. + + If we have a real absolute path name here we can try to do something smart: + detecting the root of the checkout and truncating /path/to/checkout from + the name so that we get header guards that don't include things like + "C:\\Documents and Settings\\..." or "/home/username/..." in them and thus + people on different computers who have checked the source out to different + locations won't see bogus errors. + """ + fullname = self.FullName() + + if os.path.exists(fullname): + project_dir = os.path.dirname(fullname) + + # If the user specified a repository path, it exists, and the file is + # contained in it, use the specified repository path + if _repository: + repo = FileInfo(_repository).FullName() + root_dir = project_dir + while os.path.exists(root_dir): + # allow case insensitive compare on Windows + if os.path.normcase(root_dir) == os.path.normcase(repo): + return os.path.relpath(fullname, root_dir).replace("\\", "/") + one_up_dir = os.path.dirname(root_dir) + if one_up_dir == root_dir: + break + root_dir = one_up_dir + + if os.path.exists(os.path.join(project_dir, ".svn")): + # If there's a .svn file in the current directory, we recursively look + # up the directory tree for the top of the SVN checkout + root_dir = project_dir + one_up_dir = os.path.dirname(root_dir) + while os.path.exists(os.path.join(one_up_dir, ".svn")): + root_dir = os.path.dirname(root_dir) + one_up_dir = os.path.dirname(one_up_dir) + + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1 :] + + # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by + # searching up from the current path. + root_dir = current_dir = os.path.dirname(fullname) + while current_dir != os.path.dirname(current_dir): + if ( + os.path.exists(os.path.join(current_dir, ".git")) + or os.path.exists(os.path.join(current_dir, ".hg")) + or os.path.exists(os.path.join(current_dir, ".svn")) + ): + root_dir = current_dir + break + current_dir = os.path.dirname(current_dir) + + if ( + os.path.exists(os.path.join(root_dir, ".git")) + or os.path.exists(os.path.join(root_dir, ".hg")) + or os.path.exists(os.path.join(root_dir, ".svn")) + ): + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1 :] + + # Don't know what to do; header guard warnings may be wrong... + return fullname + + def Split(self): + """Splits the file into the directory, basename, and extension. + + For 'chrome/browser/browser.cc', Split() would + return ('chrome/browser', 'browser', '.cc') + + Returns: + A tuple of (directory, basename, extension). + """ + + googlename = self.RepositoryName() + project, rest = os.path.split(googlename) + return (project,) + os.path.splitext(rest) + + def BaseName(self): + """File base name - text after the final slash, before the final period.""" + return self.Split()[1] + + def Extension(self): + """File extension - text following the final period, includes that period.""" + return self.Split()[2] + + def NoExtension(self): + """File has no source file extension.""" + return "/".join(self.Split()[0:2]) + + def IsSource(self): + """File has a source file extension.""" + return _IsSourceExtension(self.Extension()[1:]) - def Extension(self): - """File extension - text following the final period, includes that period.""" - return self.Split()[2] - def NoExtension(self): - """File has no source file extension.""" - return '/'.join(self.Split()[0:2]) +def _ShouldPrintError(category, confidence, filename, linenum): + """If confidence >= verbose, category passes filter and is not suppressed.""" - def IsSource(self): - """File has a source file extension.""" - return _IsSourceExtension(self.Extension()[1:]) + # There are three ways we might decide not to print an error message: + # a "NOLINT(category)" comment appears in the source, + # the verbosity level isn't high enough, or the filters filter it out. + if IsErrorSuppressedByNolint(category, linenum): + return False + if confidence < _cpplint_state.verbose_level: + return False -def _ShouldPrintError(category, confidence, filename, linenum): - """If confidence >= verbose, category passes filter and is not suppressed.""" + is_filtered = False + for one_filter in _Filters(): + filter_cat, filter_file, filter_line = _ParseFilterSelector(one_filter[1:]) + category_match = category.startswith(filter_cat) + file_match = filter_file == "" or filter_file == filename + line_match = filter_line == linenum or filter_line == -1 + + if one_filter.startswith("-"): + if category_match and file_match and line_match: + is_filtered = True + elif one_filter.startswith("+"): + if category_match and file_match and line_match: + is_filtered = False + else: + assert False # should have been checked for in SetFilter. + if is_filtered: + return False - # There are three ways we might decide not to print an error message: - # a "NOLINT(category)" comment appears in the source, - # the verbosity level isn't high enough, or the filters filter it out. - if IsErrorSuppressedByNolint(category, linenum): - return False + return True - if confidence < _cpplint_state.verbose_level: - return False - is_filtered = False - for one_filter in _Filters(): - filter_cat, filter_file, filter_line = _ParseFilterSelector(one_filter[1:]) - category_match = category.startswith(filter_cat) - file_match = filter_file == "" or filter_file == filename - line_match = filter_line == linenum or filter_line == -1 - - if one_filter.startswith('-'): - if category_match and file_match and line_match: - is_filtered = True - elif one_filter.startswith('+'): - if category_match and file_match and line_match: - is_filtered = False - else: - assert False # should have been checked for in SetFilter. - if is_filtered: - return False +def Error(filename, linenum, category, confidence, message): + """Logs the fact we've found a lint error. - return True + We log where the error was found, and also our confidence in the error, + that is, how certain we are this is a legitimate style regression, and + not a misidentification or a use that's sometimes justified. + False positives can be suppressed by the use of "NOLINT(category)" + comments, NOLINTNEXTLINE or in blocks started by NOLINTBEGIN. These + are parsed into _error_suppressions. -def Error(filename, linenum, category, confidence, message): - """Logs the fact we've found a lint error. - - We log where the error was found, and also our confidence in the error, - that is, how certain we are this is a legitimate style regression, and - not a misidentification or a use that's sometimes justified. - - False positives can be suppressed by the use of "NOLINT(category)" - comments, NOLINTNEXTLINE or in blocks started by NOLINTBEGIN. These - are parsed into _error_suppressions. - - Args: - filename: The name of the file containing the error. - linenum: The number of the line containing the error. - category: A string used to describe the "category" this bug - falls under: "whitespace", say, or "runtime". Categories - may have a hierarchy separated by slashes: "whitespace/indent". - confidence: A number from 1-5 representing a confidence score for - the error, with 5 meaning that we are certain of the problem, - and 1 meaning that it could be a legitimate construct. - message: The error message. - """ - if _ShouldPrintError(category, confidence, filename, linenum): - _cpplint_state.IncrementErrorCount(category) - if _cpplint_state.output_format == 'vs7': - _cpplint_state.PrintError(f'{filename}({linenum}): error cpplint:' - f' [{category}] {message} [{confidence}]\n') - elif _cpplint_state.output_format == 'eclipse': - sys.stderr.write(f'{filename}:{linenum}: warning:' - f' {message} [{category}] [{confidence}]\n') - elif _cpplint_state.output_format == 'junit': - _cpplint_state.AddJUnitFailure(filename, linenum, message, category, confidence) - elif _cpplint_state.output_format in ['sed', 'gsed']: - if message in _SED_FIXUPS: - sys.stdout.write(f"{_cpplint_state.output_format} -i" - f" '{linenum}{_SED_FIXUPS[message]}' {filename}" - f" # {message} [{category}] [{confidence}]\n") - else: - sys.stderr.write(f'# {filename}:{linenum}: ' - f' "{message}" [{category}] [{confidence}]\n') - else: - final_message = (f'{filename}:{linenum}: ' - f' {message} [{category}] [{confidence}]\n') - sys.stderr.write(final_message) + Args: + filename: The name of the file containing the error. + linenum: The number of the line containing the error. + category: A string used to describe the "category" this bug + falls under: "whitespace", say, or "runtime". Categories + may have a hierarchy separated by slashes: "whitespace/indent". + confidence: A number from 1-5 representing a confidence score for + the error, with 5 meaning that we are certain of the problem, + and 1 meaning that it could be a legitimate construct. + message: The error message. + """ + if _ShouldPrintError(category, confidence, filename, linenum): + _cpplint_state.IncrementErrorCount(category) + if _cpplint_state.output_format == "vs7": + _cpplint_state.PrintError( + f"{filename}({linenum}): error cpplint:" + f" [{category}] {message} [{confidence}]\n" + ) + elif _cpplint_state.output_format == "eclipse": + sys.stderr.write( + f"{filename}:{linenum}: warning:" + f" {message} [{category}] [{confidence}]\n" + ) + elif _cpplint_state.output_format == "junit": + _cpplint_state.AddJUnitFailure( + filename, linenum, message, category, confidence + ) + elif _cpplint_state.output_format in ["sed", "gsed"]: + if message in _SED_FIXUPS: + sys.stdout.write( + f"{_cpplint_state.output_format} -i" + f" '{linenum}{_SED_FIXUPS[message]}' {filename}" + f" # {message} [{category}] [{confidence}]\n" + ) + else: + sys.stderr.write( + f"# {filename}:{linenum}: " + f' "{message}" [{category}] [{confidence}]\n' + ) + else: + final_message = ( + f"{filename}:{linenum}: " f" {message} [{category}] [{confidence}]\n" + ) + sys.stderr.write(final_message) # Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. -_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( - r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') +_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') # Match a single C style comment on the same line. -_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/' +_RE_PATTERN_C_COMMENTS = r"/\*(?:[^*]|\*(?!/))*\*/" # Matches multi-line C style comments. # This RE is a little bit more complicated than one might expect, because we # have to take care of space removals tools so we can handle comments inside @@ -1836,866 +1930,958 @@ def Error(filename, linenum, category, confidence, message): # if this doesn't work we try on left side but only if there's a non-character # on the right. _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( - r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + - _RE_PATTERN_C_COMMENTS + r'\s+|' + - r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' + - _RE_PATTERN_C_COMMENTS + r')') + r"(\s*" + + _RE_PATTERN_C_COMMENTS + + r"\s*$|" + + _RE_PATTERN_C_COMMENTS + + r"\s+|" + + r"\s+" + + _RE_PATTERN_C_COMMENTS + + r"(?=\W)|" + + _RE_PATTERN_C_COMMENTS + + r")" +) def IsCppString(line): - """Does line terminate so, that the next symbol is in string constant. + """Does line terminate so, that the next symbol is in string constant. - This function does not consider single-line nor multi-line comments. + This function does not consider single-line nor multi-line comments. - Args: - line: is a partial line of code starting from the 0..n. + Args: + line: is a partial line of code starting from the 0..n. - Returns: - True, if next character appended to 'line' is inside a - string constant. - """ + Returns: + True, if next character appended to 'line' is inside a + string constant. + """ - line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" - return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 + line = line.replace(r"\\", "XX") # after this, \\" does not match to \" + return ((line.count('"') - line.count(r"\"") - line.count("'\"'")) & 1) == 1 def CleanseRawStrings(raw_lines): - """Removes C++11 raw strings from lines. - - Before: - static const char kData[] = R"( - multi-line string - )"; - - After: - static const char kData[] = "" - (replaced by blank line) - ""; - - Args: - raw_lines: list of raw lines. - - Returns: - list of lines with C++11 raw strings replaced by empty strings. - """ - - delimiter = None - lines_without_raw_strings = [] - for line in raw_lines: - if delimiter: - # Inside a raw string, look for the end - end = line.find(delimiter) - if end >= 0: - # Found the end of the string, match leading space for this - # line and resume copying the original lines, and also insert - # a "" on the last line. - leading_space = re.match(r'^(\s*)\S', line) - line = leading_space.group(1) + '""' + line[end + len(delimiter):] - delimiter = None - else: - # Haven't found the end yet, append a blank line. - line = '""' - - # Look for beginning of a raw string, and replace them with - # empty strings. This is done in a loop to handle multiple raw - # strings on the same line. - while delimiter is None: - # Look for beginning of a raw string. - # See 2.14.15 [lex.string] for syntax. - # - # Once we have matched a raw string, we check the prefix of the - # line to make sure that the line is not part of a single line - # comment. It's done this way because we remove raw strings - # before removing comments as opposed to removing comments - # before removing raw strings. This is because there are some - # cpplint checks that requires the comments to be preserved, but - # we don't want to check comments that are inside raw strings. - matched = re.match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) - if (matched and - not re.match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', - matched.group(1))): - delimiter = ')' + matched.group(2) + '"' - - end = matched.group(3).find(delimiter) - if end >= 0: - # Raw string ended on same line - line = (matched.group(1) + '""' + - matched.group(3)[end + len(delimiter):]) - delimiter = None - else: - # Start of a multi-line raw string - line = matched.group(1) + '""' - else: - break + """Removes C++11 raw strings from lines. + + Before: + static const char kData[] = R"( + multi-line string + )"; + + After: + static const char kData[] = "" + (replaced by blank line) + ""; - lines_without_raw_strings.append(line) + Args: + raw_lines: list of raw lines. + + Returns: + list of lines with C++11 raw strings replaced by empty strings. + """ - # TODO(unknown): if delimiter is not None here, we might want to - # emit a warning for unterminated string. - return lines_without_raw_strings + delimiter = None + lines_without_raw_strings = [] + for line in raw_lines: + if delimiter: + # Inside a raw string, look for the end + end = line.find(delimiter) + if end >= 0: + # Found the end of the string, match leading space for this + # line and resume copying the original lines, and also insert + # a "" on the last line. + leading_space = re.match(r"^(\s*)\S", line) + line = leading_space.group(1) + '""' + line[end + len(delimiter) :] + delimiter = None + else: + # Haven't found the end yet, append a blank line. + line = '""' + + # Look for beginning of a raw string, and replace them with + # empty strings. This is done in a loop to handle multiple raw + # strings on the same line. + while delimiter is None: + # Look for beginning of a raw string. + # See 2.14.15 [lex.string] for syntax. + # + # Once we have matched a raw string, we check the prefix of the + # line to make sure that the line is not part of a single line + # comment. It's done this way because we remove raw strings + # before removing comments as opposed to removing comments + # before removing raw strings. This is because there are some + # cpplint checks that requires the comments to be preserved, but + # we don't want to check comments that are inside raw strings. + matched = re.match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) + if matched and not re.match( + r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', matched.group(1) + ): + delimiter = ")" + matched.group(2) + '"' + + end = matched.group(3).find(delimiter) + if end >= 0: + # Raw string ended on same line + line = ( + matched.group(1) + + '""' + + matched.group(3)[end + len(delimiter) :] + ) + delimiter = None + else: + # Start of a multi-line raw string + line = matched.group(1) + '""' + else: + break + + lines_without_raw_strings.append(line) + + # TODO(unknown): if delimiter is not None here, we might want to + # emit a warning for unterminated string. + return lines_without_raw_strings def FindNextMultiLineCommentStart(lines, lineix): - """Find the beginning marker for a multiline comment.""" - while lineix < len(lines): - if lines[lineix].strip().startswith('/*'): - # Only return this marker if the comment goes beyond this line - if lines[lineix].strip().find('*/', 2) < 0: - return lineix - lineix += 1 - return len(lines) + """Find the beginning marker for a multiline comment.""" + while lineix < len(lines): + if lines[lineix].strip().startswith("/*"): + # Only return this marker if the comment goes beyond this line + if lines[lineix].strip().find("*/", 2) < 0: + return lineix + lineix += 1 + return len(lines) def FindNextMultiLineCommentEnd(lines, lineix): - """We are inside a comment, find the end marker.""" - while lineix < len(lines): - if lines[lineix].strip().endswith('*/'): - return lineix - lineix += 1 - return len(lines) + """We are inside a comment, find the end marker.""" + while lineix < len(lines): + if lines[lineix].strip().endswith("*/"): + return lineix + lineix += 1 + return len(lines) def RemoveMultiLineCommentsFromRange(lines, begin, end): - """Clears a range of lines for multi-line comments.""" - # Having // comments makes the lines non-empty, so we will not get - # unnecessary blank line warnings later in the code. - for i in range(begin, end): - lines[i] = '/**/' + """Clears a range of lines for multi-line comments.""" + # Having // comments makes the lines non-empty, so we will not get + # unnecessary blank line warnings later in the code. + for i in range(begin, end): + lines[i] = "/**/" def RemoveMultiLineComments(filename, lines, error): - """Removes multiline (c-style) comments from lines.""" - lineix = 0 - while lineix < len(lines): - lineix_begin = FindNextMultiLineCommentStart(lines, lineix) - if lineix_begin >= len(lines): - return - lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) - if lineix_end >= len(lines): - error(filename, lineix_begin + 1, 'readability/multiline_comment', 5, - 'Could not find end of multi-line comment') - return - RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) - lineix = lineix_end + 1 + """Removes multiline (c-style) comments from lines.""" + lineix = 0 + while lineix < len(lines): + lineix_begin = FindNextMultiLineCommentStart(lines, lineix) + if lineix_begin >= len(lines): + return + lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) + if lineix_end >= len(lines): + error( + filename, + lineix_begin + 1, + "readability/multiline_comment", + 5, + "Could not find end of multi-line comment", + ) + return + RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) + lineix = lineix_end + 1 def CleanseComments(line): - """Removes //-comments and single-line C-style /* */ comments. + """Removes //-comments and single-line C-style /* */ comments. - Args: - line: A line of C++ source. + Args: + line: A line of C++ source. - Returns: - The line with single-line comments removed. - """ - commentpos = line.find('//') - if commentpos != -1 and not IsCppString(line[:commentpos]): - line = line[:commentpos].rstrip() - # get rid of /* ... */ - return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) + Returns: + The line with single-line comments removed. + """ + commentpos = line.find("//") + if commentpos != -1 and not IsCppString(line[:commentpos]): + line = line[:commentpos].rstrip() + # get rid of /* ... */ + return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub("", line) def ReplaceAlternateTokens(line): - """Replace any alternate token by its original counterpart. + """Replace any alternate token by its original counterpart. - In order to comply with the google rule stating that unary operators should - never be followed by a space, an exception is made for the 'not' and 'compl' - alternate tokens. For these, any trailing space is removed during the - conversion. + In order to comply with the google rule stating that unary operators should + never be followed by a space, an exception is made for the 'not' and 'compl' + alternate tokens. For these, any trailing space is removed during the + conversion. - Args: - line: The line being processed. + Args: + line: The line being processed. - Returns: - The line with alternate tokens replaced. - """ - for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): - token = _ALT_TOKEN_REPLACEMENT[match.group(2)] - tail = '' if match.group(2) in ['not', 'compl'] and match.group(3) == ' ' \ - else r'\3' - line = re.sub(match.re, rf'\1{token}{tail}', line, count=1) - return line + Returns: + The line with alternate tokens replaced. + """ + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + token = _ALT_TOKEN_REPLACEMENT[match.group(2)] + tail = ( + "" + if match.group(2) in ["not", "compl"] and match.group(3) == " " + else r"\3" + ) + line = re.sub(match.re, rf"\1{token}{tail}", line, count=1) + return line class CleansedLines(object): - """Holds 4 copies of all lines with different preprocessing applied to them. - - 1) elided member contains lines without strings and comments. - 2) lines member contains lines without comments. - 3) raw_lines member contains all the lines without processing. - 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw - strings removed. - All these members are of , and of the same length. - """ - - def __init__(self, lines): - if '-readability/alt_tokens' in _cpplint_state.filters: - for i, line in enumerate(lines): - lines[i] = ReplaceAlternateTokens(line) - self.elided = [] - self.lines = [] - self.raw_lines = lines - self.num_lines = len(lines) - self.lines_without_raw_strings = CleanseRawStrings(lines) - for line in self.lines_without_raw_strings: - self.lines.append(CleanseComments(line)) - elided = self._CollapseStrings(line) - self.elided.append(CleanseComments(elided)) - - def NumLines(self): - """Returns the number of lines represented.""" - return self.num_lines - - @staticmethod - def _CollapseStrings(elided): - """Collapses strings and chars on a line to simple "" or '' blocks. - - We nix strings first so we're not fooled by text like '"http://"' + """Holds 4 copies of all lines with different preprocessing applied to them. + + 1) elided member contains lines without strings and comments. + 2) lines member contains lines without comments. + 3) raw_lines member contains all the lines without processing. + 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw + strings removed. + All these members are of , and of the same length. + """ + + def __init__(self, lines): + if "-readability/alt_tokens" in _cpplint_state.filters: + for i, line in enumerate(lines): + lines[i] = ReplaceAlternateTokens(line) + self.elided = [] + self.lines = [] + self.raw_lines = lines + self.num_lines = len(lines) + self.lines_without_raw_strings = CleanseRawStrings(lines) + for line in self.lines_without_raw_strings: + self.lines.append(CleanseComments(line)) + elided = self._CollapseStrings(line) + self.elided.append(CleanseComments(elided)) + + def NumLines(self): + """Returns the number of lines represented.""" + return self.num_lines + + @staticmethod + def _CollapseStrings(elided): + """Collapses strings and chars on a line to simple "" or '' blocks. + + We nix strings first so we're not fooled by text like '"http://"' + + Args: + elided: The line being processed. + + Returns: + The line with collapsed strings. + """ + if _RE_PATTERN_INCLUDE.match(elided): + return elided + + # Remove escaped characters first to make quote/single quote collapsing + # basic. Things that look like escaped characters shouldn't occur + # outside of strings and chars. + elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub("", elided) + + # Replace quoted strings and digit separators. Both single quotes + # and double quotes are processed in the same loop, otherwise + # nested quotes wouldn't work. + collapsed = "" + while True: + # Find the first quote character + match = re.match(r'^([^\'"]*)([\'"])(.*)$', elided) + if not match: + collapsed += elided + break + head, quote, tail = match.groups() + + if quote == '"': + # Collapse double quoted strings + second_quote = tail.find('"') + if second_quote >= 0: + collapsed += head + '""' + elided = tail[second_quote + 1 :] + else: + # Unmatched double quote, don't bother processing the rest + # of the line since this is probably a multiline string. + collapsed += elided + break + else: + # Found single quote, check nearby text to eliminate digit separators. + # + # There is no special handling for floating point here, because + # the integer/fractional/exponent parts would all be parsed + # correctly as long as there are digits on both sides of the + # separator. So we are fine as long as we don't see something + # like "0.'3" (gcc 4.9.0 will not allow this literal). + if re.search(r"\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$", head): + match_literal = re.match( + r"^((?:\'?[0-9a-zA-Z_])*)(.*)$", "'" + tail + ) + collapsed += head + match_literal.group(1).replace("'", "") + elided = match_literal.group(2) + else: + second_quote = tail.find("'") + if second_quote >= 0: + collapsed += head + "''" + elided = tail[second_quote + 1 :] + else: + # Unmatched single quote + collapsed += elided + break + + return collapsed + + +def FindEndOfExpressionInLine(line, startpos, stack): + """Find the position just after the end of current parenthesized expression. Args: - elided: The line being processed. + line: a CleansedLines line. + startpos: start searching at this position. + stack: nesting stack at startpos. Returns: - The line with collapsed strings. + On finding matching end: (index just after matching end, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at end of this line) """ - if _RE_PATTERN_INCLUDE.match(elided): - return elided - - # Remove escaped characters first to make quote/single quote collapsing - # basic. Things that look like escaped characters shouldn't occur - # outside of strings and chars. - elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) - - # Replace quoted strings and digit separators. Both single quotes - # and double quotes are processed in the same loop, otherwise - # nested quotes wouldn't work. - collapsed = '' - while True: - # Find the first quote character - match = re.match(r'^([^\'"]*)([\'"])(.*)$', elided) - if not match: - collapsed += elided - break - head, quote, tail = match.groups() - - if quote == '"': - # Collapse double quoted strings - second_quote = tail.find('"') - if second_quote >= 0: - collapsed += head + '""' - elided = tail[second_quote + 1:] - else: - # Unmatched double quote, don't bother processing the rest - # of the line since this is probably a multiline string. - collapsed += elided - break - else: - # Found single quote, check nearby text to eliminate digit separators. - # - # There is no special handling for floating point here, because - # the integer/fractional/exponent parts would all be parsed - # correctly as long as there are digits on both sides of the - # separator. So we are fine as long as we don't see something - # like "0.'3" (gcc 4.9.0 will not allow this literal). - if re.search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): - match_literal = re.match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) - collapsed += head + match_literal.group(1).replace("'", '') - elided = match_literal.group(2) - else: - second_quote = tail.find('\'') - if second_quote >= 0: - collapsed += head + "''" - elided = tail[second_quote + 1:] - else: - # Unmatched single quote - collapsed += elided - break + for i in range(startpos, len(line)): + char = line[i] + if char in "([{": + # Found start of parenthesized expression, push to expression stack + stack.append(char) + elif char == "<": + # Found potential start of template argument list + if i > 0 and line[i - 1] == "<": + # Left shift operator + if stack and stack[-1] == "<": + stack.pop() + if not stack: + return (-1, None) + elif i > 0 and re.search(r"\boperator\s*$", line[0:i]): + # operator<, don't add to stack + continue + else: + # Tentative start of template argument list + stack.append("<") + elif char in ")]}": + # Found end of parenthesized expression. + # + # If we are currently expecting a matching '>', the pending '<' + # must have been an operator. Remove them from expression stack. + while stack and stack[-1] == "<": + stack.pop() + if not stack: + return (-1, None) + if ( + (stack[-1] == "(" and char == ")") + or (stack[-1] == "[" and char == "]") + or (stack[-1] == "{" and char == "}") + ): + stack.pop() + if not stack: + return (i + 1, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == ">": + # Found potential end of template argument list. + + # Ignore "->" and operator functions + if i > 0 and ( + line[i - 1] == "-" or re.search(r"\boperator\s*$", line[0 : i - 1]) + ): + continue + + # Pop the stack if there is a matching '<'. Otherwise, ignore + # this '>' since it must be an operator. + if stack: + if stack[-1] == "<": + stack.pop() + if not stack: + return (i + 1, None) + elif char == ";": + # Found something that look like end of statements. If we are currently + # expecting a '>', the matching '<' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == "<": + stack.pop() + if not stack: + return (-1, None) + + # Did not find end of expression or unbalanced parentheses on this line + return (-1, stack) - return collapsed +def CloseExpression(clean_lines, linenum, pos): + """If input points to ( or { or [ or <, finds the position that closes it. -def FindEndOfExpressionInLine(line, startpos, stack): - """Find the position just after the end of current parenthesized expression. - - Args: - line: a CleansedLines line. - startpos: start searching at this position. - stack: nesting stack at startpos. - - Returns: - On finding matching end: (index just after matching end, None) - On finding an unclosed expression: (-1, None) - Otherwise: (-1, new stack at end of this line) - """ - for i in range(startpos, len(line)): - char = line[i] - if char in '([{': - # Found start of parenthesized expression, push to expression stack - stack.append(char) - elif char == '<': - # Found potential start of template argument list - if i > 0 and line[i - 1] == '<': - # Left shift operator - if stack and stack[-1] == '<': - stack.pop() - if not stack: - return (-1, None) - elif i > 0 and re.search(r'\boperator\s*$', line[0:i]): - # operator<, don't add to stack - continue - else: - # Tentative start of template argument list - stack.append('<') - elif char in ')]}': - # Found end of parenthesized expression. - # - # If we are currently expecting a matching '>', the pending '<' - # must have been an operator. Remove them from expression stack. - while stack and stack[-1] == '<': - stack.pop() - if not stack: - return (-1, None) - if ((stack[-1] == '(' and char == ')') or - (stack[-1] == '[' and char == ']') or - (stack[-1] == '{' and char == '}')): - stack.pop() - if not stack: - return (i + 1, None) - else: - # Mismatched parentheses - return (-1, None) - elif char == '>': - # Found potential end of template argument list. - - # Ignore "->" and operator functions - if (i > 0 and - (line[i - 1] == '-' or re.search(r'\boperator\s*$', line[0:i - 1]))): - continue - - # Pop the stack if there is a matching '<'. Otherwise, ignore - # this '>' since it must be an operator. - if stack: - if stack[-1] == '<': - stack.pop() - if not stack: - return (i + 1, None) - elif char == ';': - # Found something that look like end of statements. If we are currently - # expecting a '>', the matching '<' must have been an operator, since - # template argument list should not contain statements. - while stack and stack[-1] == '<': - stack.pop() - if not stack: - return (-1, None) - - # Did not find end of expression or unbalanced parentheses on this line - return (-1, stack) + If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the + linenum/pos that correspond to the closing of the expression. + TODO(unknown): cpplint spends a fair bit of time matching parentheses. + Ideally we would want to index all opening and closing parentheses once + and have CloseExpression be just a simple lookup, but due to preprocessor + tricks, this is not so easy. -def CloseExpression(clean_lines, linenum, pos): - """If input points to ( or { or [ or <, finds the position that closes it. - - If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the - linenum/pos that correspond to the closing of the expression. - - TODO(unknown): cpplint spends a fair bit of time matching parentheses. - Ideally we would want to index all opening and closing parentheses once - and have CloseExpression be just a simple lookup, but due to preprocessor - tricks, this is not so easy. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: A position on the line. - - Returns: - A tuple (line, linenum, pos) pointer *past* the closing brace, or - (line, len(lines), -1) if we never find a close. Note we ignore - strings and comments when matching; and the line we return is the - 'cleansed' line at linenum. - """ - - line = clean_lines.elided[linenum] - if (line[pos] not in '({[<') or re.match(r'<[<=]', line[pos:]): - return (line, clean_lines.NumLines(), -1) + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. - # Check first line - (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) - if end_pos > -1: - return (line, linenum, end_pos) + Returns: + A tuple (line, linenum, pos) pointer *past* the closing brace, or + (line, len(lines), -1) if we never find a close. Note we ignore + strings and comments when matching; and the line we return is the + 'cleansed' line at linenum. + """ - # Continue scanning forward - while stack and linenum < clean_lines.NumLines() - 1: - linenum += 1 line = clean_lines.elided[linenum] - (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) + if (line[pos] not in "({[<") or re.match(r"<[<=]", line[pos:]): + return (line, clean_lines.NumLines(), -1) + + # Check first line + (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) if end_pos > -1: - return (line, linenum, end_pos) + return (line, linenum, end_pos) + + # Continue scanning forward + while stack and linenum < clean_lines.NumLines() - 1: + linenum += 1 + line = clean_lines.elided[linenum] + (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) + if end_pos > -1: + return (line, linenum, end_pos) - # Did not find end of expression before end of file, give up - return (line, clean_lines.NumLines(), -1) + # Did not find end of expression before end of file, give up + return (line, clean_lines.NumLines(), -1) def FindStartOfExpressionInLine(line, endpos, stack): - """Find position at the matching start of current expression. - - This is almost the reverse of FindEndOfExpressionInLine, but note - that the input position and returned position differs by 1. - - Args: - line: a CleansedLines line. - endpos: start searching at this position. - stack: nesting stack at endpos. - - Returns: - On finding matching start: (index at matching start, None) - On finding an unclosed expression: (-1, None) - Otherwise: (-1, new stack at beginning of this line) - """ - i = endpos - while i >= 0: - char = line[i] - if char in ')]}': - # Found end of expression, push to expression stack - stack.append(char) - elif char == '>': - # Found potential end of template argument list. - # - # Ignore it if it's a "->" or ">=" or "operator>" - if (i > 0 and - (line[i - 1] == '-' or - re.match(r'\s>=\s', line[i - 1:]) or - re.search(r'\boperator\s*$', line[0:i]))): - i -= 1 - else: - stack.append('>') - elif char == '<': - # Found potential start of template argument list - if i > 0 and line[i - 1] == '<': - # Left shift operator + """Find position at the matching start of current expression. + + This is almost the reverse of FindEndOfExpressionInLine, but note + that the input position and returned position differs by 1. + + Args: + line: a CleansedLines line. + endpos: start searching at this position. + stack: nesting stack at endpos. + + Returns: + On finding matching start: (index at matching start, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at beginning of this line) + """ + i = endpos + while i >= 0: + char = line[i] + if char in ")]}": + # Found end of expression, push to expression stack + stack.append(char) + elif char == ">": + # Found potential end of template argument list. + # + # Ignore it if it's a "->" or ">=" or "operator>" + if i > 0 and ( + line[i - 1] == "-" + or re.match(r"\s>=\s", line[i - 1 :]) + or re.search(r"\boperator\s*$", line[0:i]) + ): + i -= 1 + else: + stack.append(">") + elif char == "<": + # Found potential start of template argument list + if i > 0 and line[i - 1] == "<": + # Left shift operator + i -= 1 + else: + # If there is a matching '>', we can pop the expression stack. + # Otherwise, ignore this '<' since it must be an operator. + if stack and stack[-1] == ">": + stack.pop() + if not stack: + return (i, None) + elif char in "([{": + # Found start of expression. + # + # If there are any unmatched '>' on the stack, they must be + # operators. Remove those. + while stack and stack[-1] == ">": + stack.pop() + if not stack: + return (-1, None) + if ( + (char == "(" and stack[-1] == ")") + or (char == "[" and stack[-1] == "]") + or (char == "{" and stack[-1] == "}") + ): + stack.pop() + if not stack: + return (i, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == ";": + # Found something that look like end of statements. If we are currently + # expecting a '<', the matching '>' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == ">": + stack.pop() + if not stack: + return (-1, None) + i -= 1 - else: - # If there is a matching '>', we can pop the expression stack. - # Otherwise, ignore this '<' since it must be an operator. - if stack and stack[-1] == '>': - stack.pop() - if not stack: - return (i, None) - elif char in '([{': - # Found start of expression. - # - # If there are any unmatched '>' on the stack, they must be - # operators. Remove those. - while stack and stack[-1] == '>': - stack.pop() - if not stack: - return (-1, None) - if ((char == '(' and stack[-1] == ')') or - (char == '[' and stack[-1] == ']') or - (char == '{' and stack[-1] == '}')): - stack.pop() - if not stack: - return (i, None) - else: - # Mismatched parentheses - return (-1, None) - elif char == ';': - # Found something that look like end of statements. If we are currently - # expecting a '<', the matching '>' must have been an operator, since - # template argument list should not contain statements. - while stack and stack[-1] == '>': - stack.pop() - if not stack: - return (-1, None) - - i -= 1 - - return (-1, stack) + + return (-1, stack) def ReverseCloseExpression(clean_lines, linenum, pos): - """If input points to ) or } or ] or >, finds the position that opens it. - - If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the - linenum/pos that correspond to the opening of the expression. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - pos: A position on the line. - - Returns: - A tuple (line, linenum, pos) pointer *at* the opening brace, or - (line, 0, -1) if we never find the matching opening brace. Note - we ignore strings and comments when matching; and the line we - return is the 'cleansed' line at linenum. - """ - line = clean_lines.elided[linenum] - if line[pos] not in ')}]>': - return (line, 0, -1) + """If input points to ) or } or ] or >, finds the position that opens it. + + If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the + linenum/pos that correspond to the opening of the expression. - # Check last line - (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) - if start_pos > -1: - return (line, linenum, start_pos) + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. - # Continue scanning backward - while stack and linenum > 0: - linenum -= 1 + Returns: + A tuple (line, linenum, pos) pointer *at* the opening brace, or + (line, 0, -1) if we never find the matching opening brace. Note + we ignore strings and comments when matching; and the line we + return is the 'cleansed' line at linenum. + """ line = clean_lines.elided[linenum] - (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack) + if line[pos] not in ")}]>": + return (line, 0, -1) + + # Check last line + (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) if start_pos > -1: - return (line, linenum, start_pos) + return (line, linenum, start_pos) - # Did not find start of expression before beginning of file, give up - return (line, 0, -1) + # Continue scanning backward + while stack and linenum > 0: + linenum -= 1 + line = clean_lines.elided[linenum] + (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack) + if start_pos > -1: + return (line, linenum, start_pos) + + # Did not find start of expression before beginning of file, give up + return (line, 0, -1) def CheckForCopyright(filename, lines, error): - """Logs an error if no Copyright message appears at the top of the file.""" + """Logs an error if no Copyright message appears at the top of the file.""" - # We'll say it should occur by line 10. Don't forget there's a - # placeholder line at the front. - for line in range(1, min(len(lines), 11)): - if re.search(r'Copyright', lines[line], re.I): break - else: # means no copyright line was found - error(filename, 0, 'legal/copyright', 5, - 'No copyright message found. ' - 'You should have a line: "Copyright [year] "') + # We'll say it should occur by line 10. Don't forget there's a + # placeholder line at the front. + for line in range(1, min(len(lines), 11)): + if re.search(r"Copyright", lines[line], re.I): + break + else: # means no copyright line was found + error( + filename, + 0, + "legal/copyright", + 5, + "No copyright message found. " + 'You should have a line: "Copyright [year] "', + ) def GetIndentLevel(line): - """Return the number of leading spaces in line. + """Return the number of leading spaces in line. + + Args: + line: A string to check. - Args: - line: A string to check. + Returns: + An integer count of leading spaces, possibly zero. + """ + indent = re.match(r"^( *)\S", line) + if indent: + return len(indent.group(1)) + else: + return 0 - Returns: - An integer count of leading spaces, possibly zero. - """ - indent = re.match(r'^( *)\S', line) - if indent: - return len(indent.group(1)) - else: - return 0 def PathSplitToList(path): - """Returns the path split into a list by the separator. - - Args: - path: An absolute or relative path (e.g. '/a/b/c/' or '../a') - - Returns: - A list of path components (e.g. ['a', 'b', 'c]). - """ - lst = [] - while True: - (head, tail) = os.path.split(path) - if head == path: # absolute paths end - lst.append(head) - break - if tail == path: # relative paths end - lst.append(tail) - break - - path = head - lst.append(tail) - - lst.reverse() - return lst + """Returns the path split into a list by the separator. -def GetHeaderGuardCPPVariable(filename): - """Returns the CPP variable that should be used as a header guard. + Args: + path: An absolute or relative path (e.g. '/a/b/c/' or '../a') - Args: - filename: The name of a C++ header file. + Returns: + A list of path components (e.g. ['a', 'b', 'c]). + """ + lst = [] + while True: + (head, tail) = os.path.split(path) + if head == path: # absolute paths end + lst.append(head) + break + if tail == path: # relative paths end + lst.append(tail) + break - Returns: - The CPP variable that should be used as a header guard in the - named file. + path = head + lst.append(tail) - """ + lst.reverse() + return lst - # Restores original filename in case that cpplint is invoked from Emacs's - # flymake. - filename = re.sub(r'_flymake\.h$', '.h', filename) - filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) - # Replace 'c++' with 'cpp'. - filename = filename.replace('C++', 'cpp').replace('c++', 'cpp') - fileinfo = FileInfo(filename) - file_path_from_root = fileinfo.RepositoryName() +def GetHeaderGuardCPPVariable(filename): + """Returns the CPP variable that should be used as a header guard. - def FixupPathFromRoot(): - if _root_debug: - sys.stderr.write(f"\n_root fixup, _root = '{_root}'," - f" repository name = '{fileinfo.RepositoryName()}'\n") + Args: + filename: The name of a C++ header file. - # Process the file path with the --root flag if it was set. - if not _root: - if _root_debug: - sys.stderr.write("_root unspecified\n") - return file_path_from_root + Returns: + The CPP variable that should be used as a header guard in the + named file. - def StripListPrefix(lst, prefix): - # f(['x', 'y'], ['w, z']) -> None (not a valid prefix) - if lst[:len(prefix)] != prefix: - return None - # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd'] - return lst[(len(prefix)):] + """ - # root behavior: - # --root=subdir , lstrips subdir from the header guard - maybe_path = StripListPrefix(PathSplitToList(file_path_from_root), - PathSplitToList(_root)) + # Restores original filename in case that cpplint is invoked from Emacs's + # flymake. + filename = re.sub(r"_flymake\.h$", ".h", filename) + filename = re.sub(r"/\.flymake/([^/]*)$", r"/\1", filename) + # Replace 'c++' with 'cpp'. + filename = filename.replace("C++", "cpp").replace("c++", "cpp") + + fileinfo = FileInfo(filename) + file_path_from_root = fileinfo.RepositoryName() + + def FixupPathFromRoot(): + if _root_debug: + sys.stderr.write( + f"\n_root fixup, _root = '{_root}'," + f" repository name = '{fileinfo.RepositoryName()}'\n" + ) + + # Process the file path with the --root flag if it was set. + if not _root: + if _root_debug: + sys.stderr.write("_root unspecified\n") + return file_path_from_root + + def StripListPrefix(lst, prefix): + # f(['x', 'y'], ['w, z']) -> None (not a valid prefix) + if lst[: len(prefix)] != prefix: + return None + # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd'] + return lst[(len(prefix)) :] + + # root behavior: + # --root=subdir , lstrips subdir from the header guard + maybe_path = StripListPrefix( + PathSplitToList(file_path_from_root), PathSplitToList(_root) + ) + + if _root_debug: + sys.stderr.write( + ( + "_root lstrip (maybe_path=%s, file_path_from_root=%s," + + " _root=%s)\n" + ) + % (maybe_path, file_path_from_root, _root) + ) + + if maybe_path: + return os.path.join(*maybe_path) + + # --root=.. , will prepend the outer directory to the header guard + full_path = fileinfo.FullName() + # adapt slashes for windows + root_abspath = os.path.abspath(_root).replace("\\", "/") + + maybe_path = StripListPrefix( + PathSplitToList(full_path), PathSplitToList(root_abspath) + ) + + if _root_debug: + sys.stderr.write( + ("_root prepend (maybe_path=%s, full_path=%s, " + "root_abspath=%s)\n") + % (maybe_path, full_path, root_abspath) + ) + + if maybe_path: + return os.path.join(*maybe_path) + + if _root_debug: + sys.stderr.write(f"_root ignore, returning {file_path_from_root}\n") + + # --root=FAKE_DIR is ignored + return file_path_from_root + + file_path_from_root = FixupPathFromRoot() + return re.sub(r"[^a-zA-Z0-9]", "_", file_path_from_root).upper() + "_" - if _root_debug: - sys.stderr.write(("_root lstrip (maybe_path=%s, file_path_from_root=%s," + - " _root=%s)\n") % (maybe_path, file_path_from_root, _root)) - if maybe_path: - return os.path.join(*maybe_path) +def CheckForHeaderGuard(filename, clean_lines, error): + """Checks that the file contains a header guard. - # --root=.. , will prepend the outer directory to the header guard - full_path = fileinfo.FullName() - # adapt slashes for windows - root_abspath = os.path.abspath(_root).replace('\\', '/') + Logs an error if no #ifndef header guard is present. For other + headers, checks that the full pathname is used. - maybe_path = StripListPrefix(PathSplitToList(full_path), - PathSplitToList(root_abspath)) + Args: + filename: The name of the C++ header file. + clean_lines: A CleansedLines instance containing the file. + error: The function to call with any errors found. + """ - if _root_debug: - sys.stderr.write(("_root prepend (maybe_path=%s, full_path=%s, " + - "root_abspath=%s)\n") % (maybe_path, full_path, root_abspath)) + # Don't check for header guards if there are error suppression + # comments somewhere in this file. + # + # Because this is silencing a warning for a nonexistent line, we + # only support the very specific NOLINT(build/header_guard) syntax, + # and not the general NOLINT or NOLINT(*) syntax. + raw_lines = clean_lines.lines_without_raw_strings + for i in raw_lines: + if re.search(r"//\s*NOLINT\(build/header_guard\)", i): + return + + # Allow pragma once instead of header guards + for i in raw_lines: + if re.search(r"^\s*#pragma\s+once", i): + return - if maybe_path: - return os.path.join(*maybe_path) + cppvar = GetHeaderGuardCPPVariable(filename) - if _root_debug: - sys.stderr.write(f"_root ignore, returning {file_path_from_root}\n") + ifndef = "" + ifndef_linenum = 0 + define = "" + endif = "" + endif_linenum = 0 + for linenum, line in enumerate(raw_lines): + linesplit = line.split() + if len(linesplit) >= 2: + # find the first occurrence of #ifndef and #define, save arg + if not ifndef and linesplit[0] == "#ifndef": + # set ifndef to the header guard presented on the #ifndef line. + ifndef = linesplit[1] + ifndef_linenum = linenum + if not define and linesplit[0] == "#define": + define = linesplit[1] + # find the last occurrence of #endif, save entire line + if line.startswith("#endif"): + endif = line + endif_linenum = linenum + + if not ifndef or not define or ifndef != define: + error( + filename, + 0, + "build/header_guard", + 5, + f"No #ifndef header guard found, suggested CPP variable is: {cppvar}", + ) + return - # --root=FAKE_DIR is ignored - return file_path_from_root + # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ + # for backward compatibility. + if ifndef != cppvar: + error_level = 0 + if ifndef != cppvar + "_": + error_level = 5 + + ParseNolintSuppressions( + filename, raw_lines[ifndef_linenum], ifndef_linenum, error + ) + error( + filename, + ifndef_linenum, + "build/header_guard", + error_level, + f"#ifndef header guard has wrong style, please use: {cppvar}", + ) + + # Check for "//" comments on endif line. + ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, error) + match = re.match(r"#endif\s*//\s*" + cppvar + r"(_)?\b", endif) + if match: + if match.group(1) == "_": + # Issue low severity warning for deprecated double trailing underscore + error( + filename, + endif_linenum, + "build/header_guard", + 0, + f'#endif line should be "#endif // {cppvar}"', + ) + return - file_path_from_root = FixupPathFromRoot() - return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_' - - -def CheckForHeaderGuard(filename, clean_lines, error): - """Checks that the file contains a header guard. - - Logs an error if no #ifndef header guard is present. For other - headers, checks that the full pathname is used. - - Args: - filename: The name of the C++ header file. - clean_lines: A CleansedLines instance containing the file. - error: The function to call with any errors found. - """ - - # Don't check for header guards if there are error suppression - # comments somewhere in this file. - # - # Because this is silencing a warning for a nonexistent line, we - # only support the very specific NOLINT(build/header_guard) syntax, - # and not the general NOLINT or NOLINT(*) syntax. - raw_lines = clean_lines.lines_without_raw_strings - for i in raw_lines: - if re.search(r'//\s*NOLINT\(build/header_guard\)', i): - return - - # Allow pragma once instead of header guards - for i in raw_lines: - if re.search(r'^\s*#pragma\s+once', i): - return - - cppvar = GetHeaderGuardCPPVariable(filename) - - ifndef = '' - ifndef_linenum = 0 - define = '' - endif = '' - endif_linenum = 0 - for linenum, line in enumerate(raw_lines): - linesplit = line.split() - if len(linesplit) >= 2: - # find the first occurrence of #ifndef and #define, save arg - if not ifndef and linesplit[0] == '#ifndef': - # set ifndef to the header guard presented on the #ifndef line. - ifndef = linesplit[1] - ifndef_linenum = linenum - if not define and linesplit[0] == '#define': - define = linesplit[1] - # find the last occurrence of #endif, save entire line - if line.startswith('#endif'): - endif = line - endif_linenum = linenum - - if not ifndef or not define or ifndef != define: - error(filename, 0, 'build/header_guard', 5, - f'No #ifndef header guard found, suggested CPP variable is: {cppvar}') - return - - # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ - # for backward compatibility. - if ifndef != cppvar: - error_level = 0 - if ifndef != cppvar + '_': - error_level = 5 - - ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum, - error) - error(filename, ifndef_linenum, 'build/header_guard', error_level, - f'#ifndef header guard has wrong style, please use: {cppvar}') - - # Check for "//" comments on endif line. - ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, - error) - match = re.match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif) - if match: - if match.group(1) == '_': - # Issue low severity warning for deprecated double trailing underscore - error(filename, endif_linenum, 'build/header_guard', 0, - f'#endif line should be "#endif // {cppvar}"') - return - - # Didn't find the corresponding "//" comment. If this file does not - # contain any "//" comments at all, it could be that the compiler - # only wants "/**/" comments, look for those instead. - no_single_line_comments = True - for i in range(1, len(raw_lines) - 1): - line = raw_lines[i] - if re.match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): - no_single_line_comments = False - break - - if no_single_line_comments: - match = re.match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif) - if match: - if match.group(1) == '_': - # Low severity warning for double trailing underscore - error(filename, endif_linenum, 'build/header_guard', 0, - f'#endif line should be "#endif /* {cppvar} */"') - return + # Didn't find the corresponding "//" comment. If this file does not + # contain any "//" comments at all, it could be that the compiler + # only wants "/**/" comments, look for those instead. + no_single_line_comments = True + for i in range(1, len(raw_lines) - 1): + line = raw_lines[i] + if re.match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): + no_single_line_comments = False + break - # Didn't find anything - error(filename, endif_linenum, 'build/header_guard', 5, - f'#endif line should be "#endif // {cppvar}"') + if no_single_line_comments: + match = re.match(r"#endif\s*/\*\s*" + cppvar + r"(_)?\s*\*/", endif) + if match: + if match.group(1) == "_": + # Low severity warning for double trailing underscore + error( + filename, + endif_linenum, + "build/header_guard", + 0, + f'#endif line should be "#endif /* {cppvar} */"', + ) + return + + # Didn't find anything + error( + filename, + endif_linenum, + "build/header_guard", + 5, + f'#endif line should be "#endif // {cppvar}"', + ) def CheckHeaderFileIncluded(filename, include_state, error): - """Logs an error if a source file does not include its header.""" + """Logs an error if a source file does not include its header.""" - # Do not check test files - fileinfo = FileInfo(filename) - if re.search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): - return + # Do not check test files + fileinfo = FileInfo(filename) + if re.search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): + return - first_include = message = None - basefilename = filename[0:len(filename) - len(fileinfo.Extension())] - for ext in GetHeaderExtensions(): - headerfile = basefilename + '.' + ext - if not os.path.exists(headerfile): - continue - headername = FileInfo(headerfile).RepositoryName() - include_uses_unix_dir_aliases = False - for section_list in include_state.include_list: - for f in section_list: - include_text = f[0] - if "./" in include_text: - include_uses_unix_dir_aliases = True - if headername in include_text or include_text in headername: - return - if not first_include: - first_include = f[1] - - message = f'{fileinfo.RepositoryName()} should include its header file {headername}' - if include_uses_unix_dir_aliases: - message += ". Relative paths like . and .. are not allowed." - - if message: - error(filename, first_include, 'build/include', 5, message) + first_include = message = None + basefilename = filename[0 : len(filename) - len(fileinfo.Extension())] + for ext in GetHeaderExtensions(): + headerfile = basefilename + "." + ext + if not os.path.exists(headerfile): + continue + headername = FileInfo(headerfile).RepositoryName() + include_uses_unix_dir_aliases = False + for section_list in include_state.include_list: + for f in section_list: + include_text = f[0] + if "./" in include_text: + include_uses_unix_dir_aliases = True + if headername in include_text or include_text in headername: + return + if not first_include: + first_include = f[1] + + message = ( + f"{fileinfo.RepositoryName()} should include its header file {headername}" + ) + if include_uses_unix_dir_aliases: + message += ". Relative paths like . and .. are not allowed." + + if message: + error(filename, first_include, "build/include", 5, message) def CheckForBadCharacters(filename, lines, error): - """Logs an error for each line containing bad characters. + """Logs an error for each line containing bad characters. - Two kinds of bad characters: + Two kinds of bad characters: - 1. Unicode replacement characters: These indicate that either the file - contained invalid UTF-8 (likely) or Unicode replacement characters (which - it shouldn't). Note that it's possible for this to throw off line - numbering if the invalid UTF-8 occurred adjacent to a newline. + 1. Unicode replacement characters: These indicate that either the file + contained invalid UTF-8 (likely) or Unicode replacement characters (which + it shouldn't). Note that it's possible for this to throw off line + numbering if the invalid UTF-8 occurred adjacent to a newline. - 2. NUL bytes. These are problematic for some tools. + 2. NUL bytes. These are problematic for some tools. - Args: - filename: The name of the current file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ - for linenum, line in enumerate(lines): - if '\ufffd' in line: - error(filename, linenum, 'readability/utf8', 5, - 'Line contains invalid UTF-8 (or Unicode replacement character).') - if '\0' in line: - error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + for linenum, line in enumerate(lines): + if "\ufffd" in line: + error( + filename, + linenum, + "readability/utf8", + 5, + "Line contains invalid UTF-8 (or Unicode replacement character).", + ) + if "\0" in line: + error(filename, linenum, "readability/nul", 5, "Line contains NUL byte.") def CheckForNewlineAtEOF(filename, lines, error): - """Logs an error if there is no newline char at the end of the file. + """Logs an error if there is no newline char at the end of the file. - Args: - filename: The name of the current file. - lines: An array of strings, each representing a line of the file. - error: The function to call with any errors found. - """ + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ - # The array lines() was created by adding two newlines to the - # original file (go figure), then splitting on \n. - # To verify that the file ends in \n, we just have to make sure the - # last-but-two element of lines() exists and is empty. - if len(lines) < 3 or lines[-2]: - error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, - 'Could not find a newline character at the end of the file.') + # The array lines() was created by adding two newlines to the + # original file (go figure), then splitting on \n. + # To verify that the file ends in \n, we just have to make sure the + # last-but-two element of lines() exists and is empty. + if len(lines) < 3 or lines[-2]: + error( + filename, + len(lines) - 2, + "whitespace/ending_newline", + 5, + "Could not find a newline character at the end of the file.", + ) def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): - """Logs an error if we see /* ... */ or "..." that extend past one line. - - /* ... */ comments are legit inside macros, for one line. - Otherwise, we prefer // comments, so it's ok to warn about the - other. Likewise, it's ok for strings to extend across multiple - lines, as long as a line continuation character (backslash) - terminates each line. Although not currently prohibited by the C++ - style guide, it's ugly and unnecessary. We don't do well with either - in this lint program, so we warn about both. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Remove all \\ (escaped backslashes) from the line. They are OK, and the - # second (escaped) slash may trigger later \" detection erroneously. - line = line.replace('\\\\', '') - - if line.count('/*') > line.count('*/'): - error(filename, linenum, 'readability/multiline_comment', 5, - 'Complex multi-line /*...*/-style comment found. ' - 'Lint may give bogus warnings. ' - 'Consider replacing these with //-style comments, ' - 'with #if 0...#endif, ' - 'or with more clearly structured multi-line comments.') - - if (line.count('"') - line.count('\\"')) % 2: - error(filename, linenum, 'readability/multiline_string', 5, - 'Multi-line string ("...") found. This lint script doesn\'t ' - 'do well with such strings, and may give bogus warnings. ' - 'Use C++11 raw strings or concatenation instead.') + """Logs an error if we see /* ... */ or "..." that extend past one line. + + /* ... */ comments are legit inside macros, for one line. + Otherwise, we prefer // comments, so it's ok to warn about the + other. Likewise, it's ok for strings to extend across multiple + lines, as long as a line continuation character (backslash) + terminates each line. Although not currently prohibited by the C++ + style guide, it's ugly and unnecessary. We don't do well with either + in this lint program, so we warn about both. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remove all \\ (escaped backslashes) from the line. They are OK, and the + # second (escaped) slash may trigger later \" detection erroneously. + line = line.replace("\\\\", "") + + if line.count("/*") > line.count("*/"): + error( + filename, + linenum, + "readability/multiline_comment", + 5, + "Complex multi-line /*...*/-style comment found. " + "Lint may give bogus warnings. " + "Consider replacing these with //-style comments, " + "with #if 0...#endif, " + "or with more clearly structured multi-line comments.", + ) + + if (line.count('"') - line.count('\\"')) % 2: + error( + filename, + linenum, + "readability/multiline_string", + 5, + 'Multi-line string ("...") found. This lint script doesn\'t ' + "do well with such strings, and may give bogus warnings. " + "Use C++11 raw strings or concatenation instead.", + ) # (non-threadsafe name, thread-safe alternative, validation pattern) @@ -2710,488 +2896,961 @@ def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): # in some expression context on the same line by matching on some # operator before the function name. This eliminates constructors and # member function calls. -_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)' +_UNSAFE_FUNC_PREFIX = r"(?:[-+*/=%^&|(<]\s*|>\s+)" _THREADING_LIST = ( - ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'), - ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'), - ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'), - ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'), - ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'), - ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'), - ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'), - ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'), - ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'), - ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'), - ('strtok(', 'strtok_r(', - _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'), - ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), - ) + ("asctime(", "asctime_r(", _UNSAFE_FUNC_PREFIX + r"asctime\([^)]+\)"), + ("ctime(", "ctime_r(", _UNSAFE_FUNC_PREFIX + r"ctime\([^)]+\)"), + ("getgrgid(", "getgrgid_r(", _UNSAFE_FUNC_PREFIX + r"getgrgid\([^)]+\)"), + ("getgrnam(", "getgrnam_r(", _UNSAFE_FUNC_PREFIX + r"getgrnam\([^)]+\)"), + ("getlogin(", "getlogin_r(", _UNSAFE_FUNC_PREFIX + r"getlogin\(\)"), + ("getpwnam(", "getpwnam_r(", _UNSAFE_FUNC_PREFIX + r"getpwnam\([^)]+\)"), + ("getpwuid(", "getpwuid_r(", _UNSAFE_FUNC_PREFIX + r"getpwuid\([^)]+\)"), + ("gmtime(", "gmtime_r(", _UNSAFE_FUNC_PREFIX + r"gmtime\([^)]+\)"), + ("localtime(", "localtime_r(", _UNSAFE_FUNC_PREFIX + r"localtime\([^)]+\)"), + ("rand(", "rand_r(", _UNSAFE_FUNC_PREFIX + r"rand\(\)"), + ("strtok(", "strtok_r(", _UNSAFE_FUNC_PREFIX + r"strtok\([^)]+\)"), + ("ttyname(", "ttyname_r(", _UNSAFE_FUNC_PREFIX + r"ttyname\([^)]+\)"), +) def CheckPosixThreading(filename, clean_lines, linenum, error): - """Checks for calls to thread-unsafe functions. - - Much code has been originally written without consideration of - multi-threading. Also, engineers are relying on their old experience; - they have learned posix before threading extensions were added. These - tests guide the engineers to use thread-safe functions (when using - posix directly). - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: - # Additional pattern matching check to confirm that this is the - # function we are looking for - if re.search(pattern, line): - error(filename, linenum, 'runtime/threadsafe_fn', 2, - 'Consider using ' + multithread_safe_func + - '...) instead of ' + single_thread_func + - '...) for improved thread safety.') + """Checks for calls to thread-unsafe functions. + + Much code has been originally written without consideration of + multi-threading. Also, engineers are relying on their old experience; + they have learned posix before threading extensions were added. These + tests guide the engineers to use thread-safe functions (when using + posix directly). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: + # Additional pattern matching check to confirm that this is the + # function we are looking for + if re.search(pattern, line): + error( + filename, + linenum, + "runtime/threadsafe_fn", + 2, + "Consider using " + + multithread_safe_func + + "...) instead of " + + single_thread_func + + "...) for improved thread safety.", + ) def CheckVlogArguments(filename, clean_lines, linenum, error): - """Checks that VLOG() is only used for defining a logging level. - - For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and - VLOG(FATAL) are not. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - if re.search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): - error(filename, linenum, 'runtime/vlog', 5, - 'VLOG() should be used with numeric verbosity level. ' - 'Use LOG() if you want symbolic severity levels.') + """Checks that VLOG() is only used for defining a logging level. + + For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and + VLOG(FATAL) are not. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if re.search(r"\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)", line): + error( + filename, + linenum, + "runtime/vlog", + 5, + "VLOG() should be used with numeric verbosity level. " + "Use LOG() if you want symbolic severity levels.", + ) + # Matches invalid increment: *count++, which moves pointer instead of # incrementing a value. -_RE_PATTERN_INVALID_INCREMENT = re.compile( - r'^\s*\*\w+(\+\+|--);') +_RE_PATTERN_INVALID_INCREMENT = re.compile(r"^\s*\*\w+(\+\+|--);") def CheckInvalidIncrement(filename, clean_lines, linenum, error): - """Checks for invalid increment *count++. - - For example following function: - void increment_counter(int* count) { - *count++; - } - is invalid, because it effectively does count++, moving pointer, and should - be replaced with ++*count, (*count)++ or *count += 1. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - if _RE_PATTERN_INVALID_INCREMENT.match(line): - error(filename, linenum, 'runtime/invalid_increment', 5, - 'Changing pointer instead of value (or unused value of operator*).') + """Checks for invalid increment *count++. + + For example following function: + void increment_counter(int* count) { + *count++; + } + is invalid, because it effectively does count++, moving pointer, and should + be replaced with ++*count, (*count)++ or *count += 1. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if _RE_PATTERN_INVALID_INCREMENT.match(line): + error( + filename, + linenum, + "runtime/invalid_increment", + 5, + "Changing pointer instead of value (or unused value of operator*).", + ) def IsMacroDefinition(clean_lines, linenum): - if re.search(r'^#define', clean_lines[linenum]): - return True + if re.search(r"^#define", clean_lines[linenum]): + return True - if linenum > 0 and re.search(r'\\$', clean_lines[linenum - 1]): - return True + if linenum > 0 and re.search(r"\\$", clean_lines[linenum - 1]): + return True - return False + return False def IsForwardClassDeclaration(clean_lines, linenum): - return re.match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) + return re.match(r"^\s*(\btemplate\b)*.*class\s+\w+;\s*$", clean_lines[linenum]) class _BlockInfo(object): - """Stores information about a generic block of code.""" - - def __init__(self, linenum, seen_open_brace): - self.starting_linenum = linenum - self.seen_open_brace = seen_open_brace - self.open_parentheses = 0 - self.inline_asm = _NO_ASM - self.check_namespace_indentation = False - - def CheckBegin(self, filename, clean_lines, linenum, error): - """Run checks that applies to text up to the opening brace. - - This is mostly for checking the text after the class identifier - and the "{", usually where the base class is specified. For other - blocks, there isn't much to check, so we always pass. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - pass + """Stores information about a generic block of code.""" + + def __init__(self, linenum, seen_open_brace): + self.starting_linenum = linenum + self.seen_open_brace = seen_open_brace + self.open_parentheses = 0 + self.inline_asm = _NO_ASM + self.check_namespace_indentation = False + + def CheckBegin(self, filename, clean_lines, linenum, error): + """Run checks that applies to text up to the opening brace. + + This is mostly for checking the text after the class identifier + and the "{", usually where the base class is specified. For other + blocks, there isn't much to check, so we always pass. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass - def CheckEnd(self, filename, clean_lines, linenum, error): - """Run checks that applies to text after the closing brace. + def CheckEnd(self, filename, clean_lines, linenum, error): + """Run checks that applies to text after the closing brace. - This is mostly used for checking end of namespace comments. + This is mostly used for checking end of namespace comments. - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - pass + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass - def IsBlockInfo(self): - """Returns true if this block is a _BlockInfo. + def IsBlockInfo(self): + """Returns true if this block is a _BlockInfo. - This is convenient for verifying that an object is an instance of - a _BlockInfo, but not an instance of any of the derived classes. + This is convenient for verifying that an object is an instance of + a _BlockInfo, but not an instance of any of the derived classes. - Returns: - True for this class, False for derived classes. - """ - return self.__class__ == _BlockInfo + Returns: + True for this class, False for derived classes. + """ + return self.__class__ == _BlockInfo class _ExternCInfo(_BlockInfo): - """Stores information about an 'extern "C"' block.""" + """Stores information about an 'extern "C"' block.""" - def __init__(self, linenum): - _BlockInfo.__init__(self, linenum, True) + def __init__(self, linenum): + _BlockInfo.__init__(self, linenum, True) class _ClassInfo(_BlockInfo): - """Stores information about a class.""" - - def __init__(self, name, class_or_struct, clean_lines, linenum): - _BlockInfo.__init__(self, linenum, False) - self.name = name - self.is_derived = False - self.check_namespace_indentation = True - if class_or_struct == 'struct': - self.access = 'public' - self.is_struct = True - else: - self.access = 'private' - self.is_struct = False + """Stores information about a class.""" + + def __init__(self, name, class_or_struct, clean_lines, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name + self.is_derived = False + self.check_namespace_indentation = True + if class_or_struct == "struct": + self.access = "public" + self.is_struct = True + else: + self.access = "private" + self.is_struct = False - # Remember initial indentation level for this class. Using raw_lines here - # instead of elided to account for leading comments. - self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) + # Remember initial indentation level for this class. Using raw_lines here + # instead of elided to account for leading comments. + self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) - # Try to find the end of the class. This will be confused by things like: - # class A { - # } *x = { ... - # - # But it's still good enough for CheckSectionSpacing. - self.last_line = 0 - depth = 0 - for i in range(linenum, clean_lines.NumLines()): - line = clean_lines.elided[i] - depth += line.count('{') - line.count('}') - if not depth: - self.last_line = i - break - - def CheckBegin(self, filename, clean_lines, linenum, error): - # Look for a bare ':' - if re.search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): - self.is_derived = True - - def CheckEnd(self, filename, clean_lines, linenum, error): - # If there is a DISALLOW macro, it should appear near the end of - # the class. - seen_last_thing_in_class = False - for i in range(linenum - 1, self.starting_linenum, -1): - match = re.search( - r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' + - self.name + r'\)', - clean_lines.elided[i]) - if match: - if seen_last_thing_in_class: - error(filename, i, 'readability/constructors', 3, - match.group(1) + ' should be the last thing in the class') - break - - if not re.match(r'^\s*$', clean_lines.elided[i]): - seen_last_thing_in_class = True - - # Check that closing brace is aligned with beginning of the class. - # Only do this if the closing brace is indented by only whitespaces. - # This means we will not check single-line class definitions. - indent = re.match(r'^( *)\}', clean_lines.elided[linenum]) - if indent and len(indent.group(1)) != self.class_indent: - if self.is_struct: - parent = 'struct ' + self.name - else: - parent = 'class ' + self.name - error(filename, linenum, 'whitespace/indent', 3, - f'Closing brace should be aligned with beginning of {parent}') + # Try to find the end of the class. This will be confused by things like: + # class A { + # } *x = { ... + # + # But it's still good enough for CheckSectionSpacing. + self.last_line = 0 + depth = 0 + for i in range(linenum, clean_lines.NumLines()): + line = clean_lines.elided[i] + depth += line.count("{") - line.count("}") + if not depth: + self.last_line = i + break + + def CheckBegin(self, filename, clean_lines, linenum, error): + # Look for a bare ':' + if re.search("(^|[^:]):($|[^:])", clean_lines.elided[linenum]): + self.is_derived = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + # If there is a DISALLOW macro, it should appear near the end of + # the class. + seen_last_thing_in_class = False + for i in range(linenum - 1, self.starting_linenum, -1): + match = re.search( + r"\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(" + + self.name + + r"\)", + clean_lines.elided[i], + ) + if match: + if seen_last_thing_in_class: + error( + filename, + i, + "readability/constructors", + 3, + match.group(1) + " should be the last thing in the class", + ) + break + + if not re.match(r"^\s*$", clean_lines.elided[i]): + seen_last_thing_in_class = True + + # Check that closing brace is aligned with beginning of the class. + # Only do this if the closing brace is indented by only whitespaces. + # This means we will not check single-line class definitions. + indent = re.match(r"^( *)\}", clean_lines.elided[linenum]) + if indent and len(indent.group(1)) != self.class_indent: + if self.is_struct: + parent = "struct " + self.name + else: + parent = "class " + self.name + error( + filename, + linenum, + "whitespace/indent", + 3, + f"Closing brace should be aligned with beginning of {parent}", + ) class _NamespaceInfo(_BlockInfo): - """Stores information about a namespace.""" + """Stores information about a namespace.""" - def __init__(self, name, linenum): - _BlockInfo.__init__(self, linenum, False) - self.name = name or '' - self.check_namespace_indentation = True + def __init__(self, name, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name or "" + self.check_namespace_indentation = True - def CheckEnd(self, filename, clean_lines, linenum, error): - """Check end of namespace comments.""" - line = clean_lines.raw_lines[linenum] + def CheckEnd(self, filename, clean_lines, linenum, error): + """Check end of namespace comments.""" + line = clean_lines.raw_lines[linenum] - # Check how many lines is enclosed in this namespace. Don't issue - # warning for missing namespace comments if there aren't enough - # lines. However, do apply checks if there is already an end of - # namespace comment and it's incorrect. - # - # TODO(unknown): We always want to check end of namespace comments - # if a namespace is large, but sometimes we also want to apply the - # check if a short namespace contained nontrivial things (something - # other than forward declarations). There is currently no logic on - # deciding what these nontrivial things are, so this check is - # triggered by namespace size only, which works most of the time. - if (linenum - self.starting_linenum < 10 - and not re.match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)): - return - - # Look for matching comment at end of namespace. - # - # Note that we accept C style "/* */" comments for terminating - # namespaces, so that code that terminate namespaces inside - # preprocessor macros can be cpplint clean. - # - # We also accept stuff like "// end of namespace ." with the - # period at the end. - # - # Besides these, we don't accept anything else, otherwise we might - # get false negatives when existing comment is a substring of the - # expected namespace. - if self.name: - # Named namespace - if not re.match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' + - re.escape(self.name) + r'[\*/\.\\\s]*$'), - line): - error(filename, linenum, 'readability/namespace', 5, - f'Namespace should be terminated with "// namespace {self.name}"') - else: - # Anonymous namespace - if not re.match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): - # If "// namespace anonymous" or "// anonymous namespace (more text)", - # mention "// anonymous namespace" as an acceptable form - if re.match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line): - error(filename, linenum, 'readability/namespace', 5, - 'Anonymous namespace should be terminated with "// namespace"' - ' or "// anonymous namespace"') + # Check how many lines is enclosed in this namespace. Don't issue + # warning for missing namespace comments if there aren't enough + # lines. However, do apply checks if there is already an end of + # namespace comment and it's incorrect. + # + # TODO(unknown): We always want to check end of namespace comments + # if a namespace is large, but sometimes we also want to apply the + # check if a short namespace contained nontrivial things (something + # other than forward declarations). There is currently no logic on + # deciding what these nontrivial things are, so this check is + # triggered by namespace size only, which works most of the time. + if linenum - self.starting_linenum < 10 and not re.match( + r"^\s*};*\s*(//|/\*).*\bnamespace\b", line + ): + return + + # Look for matching comment at end of namespace. + # + # Note that we accept C style "/* */" comments for terminating + # namespaces, so that code that terminate namespaces inside + # preprocessor macros can be cpplint clean. + # + # We also accept stuff like "// end of namespace ." with the + # period at the end. + # + # Besides these, we don't accept anything else, otherwise we might + # get false negatives when existing comment is a substring of the + # expected namespace. + if self.name: + # Named namespace + if not re.match( + ( + r"^\s*};*\s*(//|/\*).*\bnamespace\s+" + + re.escape(self.name) + + r"[\*/\.\\\s]*$" + ), + line, + ): + error( + filename, + linenum, + "readability/namespace", + 5, + f'Namespace should be terminated with "// namespace {self.name}"', + ) else: - error(filename, linenum, 'readability/namespace', 5, - 'Anonymous namespace should be terminated with "// namespace"') + # Anonymous namespace + if not re.match(r"^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$", line): + # If "// namespace anonymous" or "// anonymous namespace (more text)", + # mention "// anonymous namespace" as an acceptable form + if re.match( + r"^\s*}.*\b(namespace anonymous|anonymous namespace)\b", line + ): + error( + filename, + linenum, + "readability/namespace", + 5, + 'Anonymous namespace should be terminated with "// namespace"' + ' or "// anonymous namespace"', + ) + else: + error( + filename, + linenum, + "readability/namespace", + 5, + 'Anonymous namespace should be terminated with "// namespace"', + ) class _PreprocessorInfo(object): - """Stores checkpoints of nesting stacks when #if/#else is seen.""" + """Stores checkpoints of nesting stacks when #if/#else is seen.""" - def __init__(self, stack_before_if): - # The entire nesting stack before #if - self.stack_before_if = stack_before_if + def __init__(self, stack_before_if): + # The entire nesting stack before #if + self.stack_before_if = stack_before_if - # The entire nesting stack up to #else - self.stack_before_else = [] + # The entire nesting stack up to #else + self.stack_before_else = [] - # Whether we have already seen #else or #elif - self.seen_else = False + # Whether we have already seen #else or #elif + self.seen_else = False class NestingState(object): - """Holds states related to parsing braces.""" - - def __init__(self): - # Stack for tracking all braces. An object is pushed whenever we - # see a "{", and popped when we see a "}". Only 3 types of - # objects are possible: - # - _ClassInfo: a class or struct. - # - _NamespaceInfo: a namespace. - # - _BlockInfo: some other type of block. - self.stack = [] - - # Top of the previous stack before each Update(). - # - # Because the nesting_stack is updated at the end of each line, we - # had to do some convoluted checks to find out what is the current - # scope at the beginning of the line. This check is simplified by - # saving the previous top of nesting stack. - # - # We could save the full stack, but we only need the top. Copying - # the full nesting stack would slow down cpplint by ~10%. - self.previous_stack_top = [] - - # Stack of _PreprocessorInfo objects. - self.pp_stack = [] - - def SeenOpenBrace(self): - """Check if we have seen the opening brace for the innermost block. - - Returns: - True if we have seen the opening brace, False if the innermost - block is still expecting an opening brace. - """ - return (not self.stack) or self.stack[-1].seen_open_brace - - def InNamespaceBody(self): - """Check if we are currently one level inside a namespace body. + """Holds states related to parsing braces.""" + + def __init__(self): + # Stack for tracking all braces. An object is pushed whenever we + # see a "{", and popped when we see a "}". Only 3 types of + # objects are possible: + # - _ClassInfo: a class or struct. + # - _NamespaceInfo: a namespace. + # - _BlockInfo: some other type of block. + self.stack = [] + + # Top of the previous stack before each Update(). + # + # Because the nesting_stack is updated at the end of each line, we + # had to do some convoluted checks to find out what is the current + # scope at the beginning of the line. This check is simplified by + # saving the previous top of nesting stack. + # + # We could save the full stack, but we only need the top. Copying + # the full nesting stack would slow down cpplint by ~10%. + self.previous_stack_top = [] + + # Stack of _PreprocessorInfo objects. + self.pp_stack = [] + + def SeenOpenBrace(self): + """Check if we have seen the opening brace for the innermost block. + + Returns: + True if we have seen the opening brace, False if the innermost + block is still expecting an opening brace. + """ + return (not self.stack) or self.stack[-1].seen_open_brace + + def InNamespaceBody(self): + """Check if we are currently one level inside a namespace body. + + Returns: + True if top of the stack is a namespace block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _NamespaceInfo) + + def InExternC(self): + """Check if we are currently one level inside an 'extern "C"' block. + + Returns: + True if top of the stack is an extern block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ExternCInfo) + + def InClassDeclaration(self): + """Check if we are currently one level inside a class or struct declaration. + + Returns: + True if top of the stack is a class/struct, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ClassInfo) + + def InAsmBlock(self): + """Check if we are currently one level inside an inline ASM block. + + Returns: + True if the top of the stack is a block containing inline ASM. + """ + return self.stack and self.stack[-1].inline_asm != _NO_ASM + + def InTemplateArgumentList(self, clean_lines, linenum, pos): + """Check if current position is inside template argument list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: position just after the suspected template argument. + Returns: + True if (linenum, pos) is inside template arguments. + """ + while linenum < clean_lines.NumLines(): + # Find the earliest character that might indicate a template argument + line = clean_lines.elided[linenum] + match = re.match(r"^[^{};=\[\]\.<>]*(.)", line[pos:]) + if not match: + linenum += 1 + pos = 0 + continue + token = match.group(1) + pos += len(match.group(0)) + + # These things do not look like template argument list: + # class Suspect { + # class Suspect x; } + if token in ("{", "}", ";"): + return False - Returns: - True if top of the stack is a namespace block, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _NamespaceInfo) + # These things look like template argument list: + # template + # template + # template + # template + if token in (">", "=", "[", "]", "."): + return True + + # Check if token is an unmatched '<'. + # If not, move on to the next character. + if token != "<": + pos += 1 + if pos >= len(line): + linenum += 1 + pos = 0 + continue + + # We can't be sure if we just find a single '<', and need to + # find the matching '>'. + (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1) + if end_pos < 0: + # Not sure if template argument list or syntax error in file + return False + linenum = end_line + pos = end_pos + return False - def InExternC(self): - """Check if we are currently one level inside an 'extern "C"' block. + def UpdatePreprocessor(self, line): + """Update preprocessor stack. + + We need to handle preprocessors due to classes like this: + #ifdef SWIG + struct ResultDetailsPageElementExtensionPoint { + #else + struct ResultDetailsPageElementExtensionPoint : public Extension { + #endif + + We make the following assumptions (good enough for most files): + - Preprocessor condition evaluates to true from #if up to first + #else/#elif/#endif. + + - Preprocessor condition evaluates to false from #else/#elif up + to #endif. We still perform lint checks on these lines, but + these do not affect nesting stack. + + Args: + line: current line to check. + """ + if re.match(r"^\s*#\s*(if|ifdef|ifndef)\b", line): + # Beginning of #if block, save the nesting stack here. The saved + # stack will allow us to restore the parsing state in the #else case. + self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) + elif re.match(r"^\s*#\s*(else|elif)\b", line): + # Beginning of #else block + if self.pp_stack: + if not self.pp_stack[-1].seen_else: + # This is the first #else or #elif block. Remember the + # whole nesting stack up to this point. This is what we + # keep after the #endif. + self.pp_stack[-1].seen_else = True + self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) + + # Restore the stack to how it was before the #if + self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) + else: + # TODO(unknown): unexpected #else, issue warning? + pass + elif re.match(r"^\s*#\s*endif\b", line): + # End of #if or #else blocks. + if self.pp_stack: + # If we saw an #else, we will need to restore the nesting + # stack to its former state before the #else, otherwise we + # will just continue from where we left off. + if self.pp_stack[-1].seen_else: + # Here we can just use a shallow copy since we are the last + # reference to it. + self.stack = self.pp_stack[-1].stack_before_else + # Drop the corresponding #if + self.pp_stack.pop() + else: + # TODO(unknown): unexpected #endif, issue warning? + pass + + # TODO(unknown): Update() is too long, but we will refactor later. + def Update(self, filename, clean_lines, linenum, error): + """Update nesting state with current line. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remember top of the previous nesting stack. + # + # The stack is always pushed/popped and not modified in place, so + # we can just do a shallow copy instead of copy.deepcopy. Using + # deepcopy would slow down cpplint by ~28%. + if self.stack: + self.previous_stack_top = self.stack[-1] + else: + self.previous_stack_top = None - Returns: - True if top of the stack is an extern block, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _ExternCInfo) + # Update pp_stack + self.UpdatePreprocessor(line) - def InClassDeclaration(self): - """Check if we are currently one level inside a class or struct declaration. + # Count parentheses. This is to avoid adding struct arguments to + # the nesting stack. + if self.stack: + inner_block = self.stack[-1] + depth_change = line.count("(") - line.count(")") + inner_block.open_parentheses += depth_change + + # Also check if we are starting or ending an inline assembly block. + if inner_block.inline_asm in (_NO_ASM, _END_ASM): + if ( + depth_change != 0 + and inner_block.open_parentheses == 1 + and _MATCH_ASM.match(line) + ): + # Enter assembly block + inner_block.inline_asm = _INSIDE_ASM + else: + # Not entering assembly block. If previous line was _END_ASM, + # we will now shift to _NO_ASM state. + inner_block.inline_asm = _NO_ASM + elif ( + inner_block.inline_asm == _INSIDE_ASM + and inner_block.open_parentheses == 0 + ): + # Exit assembly block + inner_block.inline_asm = _END_ASM + + # Consume namespace declaration at the beginning of the line. Do + # this in a loop so that we catch same line declarations like this: + # namespace proto2 { namespace bridge { class MessageSet; } } + while True: + # Match start of namespace. The "\b\s*" below catches namespace + # declarations even if it weren't followed by a whitespace, this + # is so that we don't confuse our namespace checker. The + # missing spaces will be flagged by CheckSpacing. + namespace_decl_match = re.match(r"^\s*namespace\b\s*([:\w]+)?(.*)$", line) + if not namespace_decl_match: + break + + new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) + self.stack.append(new_namespace) + + line = namespace_decl_match.group(2) + if line.find("{") != -1: + new_namespace.seen_open_brace = True + line = line[line.find("{") + 1 :] + + # Look for a class declaration in whatever is left of the line + # after parsing namespaces. The regexp accounts for decorated classes + # such as in: + # class LOCKABLE API Object { + # }; + class_decl_match = re.match( + r"^(\s*(?:template\s*<[\w\s<>,:=]*>\s*)?" + r"(class|struct)\s+(?:[a-zA-Z0-9_]+\s+)*(\w+(?:::\w+)*))" + r"(.*)$", + line, + ) + if class_decl_match and ( + not self.stack or self.stack[-1].open_parentheses == 0 + ): + # We do not want to accept classes that are actually template arguments: + # template , + # template class Ignore3> + # void Function() {}; + # + # To avoid template argument cases, we scan forward and look for + # an unmatched '>'. If we see one, assume we are inside a + # template argument list. + end_declaration = len(class_decl_match.group(1)) + if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration): + self.stack.append( + _ClassInfo( + class_decl_match.group(3), + class_decl_match.group(2), + clean_lines, + linenum, + ) + ) + line = class_decl_match.group(4) + + # If we have not yet seen the opening brace for the innermost block, + # run checks here. + if not self.SeenOpenBrace(): + self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) + + # Update access control if we are inside a class/struct + if self.stack and isinstance(self.stack[-1], _ClassInfo): + classinfo = self.stack[-1] + access_match = re.match( + r"^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?" + r":(?:[^:]|$)", + line, + ) + if access_match: + classinfo.access = access_match.group(2) + + # Check that access keywords are indented +1 space. Skip this + # check if the keywords are not preceded by whitespaces. + indent = access_match.group(1) + if len(indent) != classinfo.class_indent + 1 and re.match( + r"^\s*$", indent + ): + if classinfo.is_struct: + parent = "struct " + classinfo.name + else: + parent = "class " + classinfo.name + slots = "" + if access_match.group(3): + slots = access_match.group(3) + error( + filename, + linenum, + "whitespace/indent", + 3, + f"{access_match.group(2)}{slots}:" + f" should be indented +1 space inside {parent}", + ) + + # Consume braces or semicolons from what's left of the line + while True: + # Match first brace, semicolon, or closed parenthesis. + matched = re.match(r"^[^{;)}]*([{;)}])(.*)$", line) + if not matched: + break + + token = matched.group(1) + if token == "{": + # If namespace or class hasn't seen a opening brace yet, mark + # namespace/class head as complete. Push a new block onto the + # stack otherwise. + if not self.SeenOpenBrace(): + self.stack[-1].seen_open_brace = True + elif re.match(r'^extern\s*"[^"]*"\s*\{', line): + self.stack.append(_ExternCInfo(linenum)) + else: + self.stack.append(_BlockInfo(linenum, True)) + if _MATCH_ASM.match(line): + self.stack[-1].inline_asm = _BLOCK_ASM + + elif token == ";" or token == ")": + # If we haven't seen an opening brace yet, but we already saw + # a semicolon, this is probably a forward declaration. Pop + # the stack for these. + # + # Similarly, if we haven't seen an opening brace yet, but we + # already saw a closing parenthesis, then these are probably + # function arguments with extra "class" or "struct" keywords. + # Also pop these stack for these. + if not self.SeenOpenBrace(): + self.stack.pop() + else: # token == '}' + # Perform end of block checks and pop the stack. + if self.stack: + self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) + self.stack.pop() + line = matched.group(2) + + def InnermostClass(self): + """Get class info on the top of the stack. + + Returns: + A _ClassInfo object if we are inside a class, or None otherwise. + """ + for i in range(len(self.stack), 0, -1): + classinfo = self.stack[i - 1] + if isinstance(classinfo, _ClassInfo): + return classinfo + return None - Returns: - True if top of the stack is a class/struct, False otherwise. - """ - return self.stack and isinstance(self.stack[-1], _ClassInfo) - def InAsmBlock(self): - """Check if we are currently one level inside an inline ASM block. +def CheckForNonStandardConstructs(filename, clean_lines, linenum, nesting_state, error): + r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. - Returns: - True if the top of the stack is a block containing inline ASM. - """ - return self.stack and self.stack[-1].inline_asm != _NO_ASM + Complain about several constructs which gcc-2 accepts, but which are + not standard C++. Warning about these in lint is one way to ease the + transition to new compilers. + - put storage class first (e.g. "static const" instead of "const static"). + - "%lld" instead of %qd" in printf-type functions. + - "%1$d" is non-standard in printf-type functions. + - "\%" is an undefined character escape sequence. + - text after #endif is not allowed. + - invalid inner-style forward declaration. + - >? and ?= and ]*(.)', line[pos:]) - if not match: - linenum += 1 - pos = 0 - continue - token = match.group(1) - pos += len(match.group(0)) - - # These things do not look like template argument list: - # class Suspect { - # class Suspect x; } - if token in ('{', '}', ';'): return False - - # These things look like template argument list: - # template - # template - # template - # template - if token in ('>', '=', '[', ']', '.'): return True - - # Check if token is an unmatched '<'. - # If not, move on to the next character. - if token != '<': - pos += 1 - if pos >= len(line): - linenum += 1 - pos = 0 - continue - - # We can't be sure if we just find a single '<', and need to - # find the matching '>'. - (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1) - if end_pos < 0: - # Not sure if template argument list or syntax error in file - return False - linenum = end_line - pos = end_pos - return False - def UpdatePreprocessor(self, line): - """Update preprocessor stack. + # Remove comments from the line, but leave in strings for now. + line = clean_lines.lines[linenum] + + if re.search(r'printf\s*\(.*".*%[-+ ]?\d*q', line): + error( + filename, + linenum, + "runtime/printf_format", + 3, + "%q in format strings is deprecated. Use %ll instead.", + ) + + if re.search(r'printf\s*\(.*".*%\d+\$', line): + error( + filename, + linenum, + "runtime/printf_format", + 2, + "%N$ formats are unconventional. Try rewriting to avoid them.", + ) + + # Remove escaped backslashes before looking for undefined escapes. + line = line.replace("\\\\", "") + + if re.search(r'("|\').*\\(%|\[|\(|{)', line): + error( + filename, + linenum, + "build/printf_format", + 3, + "%, [, (, and { are undefined character escapes. Unescape them.", + ) + + # For the rest, work with both comments and strings removed. + line = clean_lines.elided[linenum] + + if re.search( + r"\b(const|volatile|void|char|short|int|long" + r"|float|double|signed|unsigned" + r"|schar|u?int8_t|u?int16_t|u?int32_t|u?int64_t)" + r"\s+(register|static|extern|typedef)\b", + line, + ): + error( + filename, + linenum, + "build/storage_class", + 5, + "Storage-class specifier (static, extern, typedef, etc) should be " + "at the beginning of the declaration.", + ) + + if re.match(r"\s*#\s*endif\s*[^/\s]+", line): + error( + filename, + linenum, + "build/endif_comment", + 5, + "Uncommented text after #endif is non-standard. Use a comment.", + ) + + if re.match(r"\s*class\s+(\w+\s*::\s*)+\w+\s*;", line): + error( + filename, + linenum, + "build/forward_decl", + 5, + "Inner-style forward declarations are invalid. Remove this line.", + ) + + if re.search(r"(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?", line): + error( + filename, + linenum, + "build/deprecated", + 3, + ">? and ))?' + # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' + error( + filename, + linenum, + "runtime/member_string_references", + 2, + "const string& members are dangerous. It is much better to use " + "alternatives, such as pointers or simple constants.", + ) + + # Everything else in this function operates on class declarations. + # Return early if the top of the nesting stack is not a class, or if + # the class head is not completed yet. + classinfo = nesting_state.InnermostClass() + if not classinfo or not classinfo.seen_open_brace: + return - We need to handle preprocessors due to classes like this: - #ifdef SWIG - struct ResultDetailsPageElementExtensionPoint { - #else - struct ResultDetailsPageElementExtensionPoint : public Extension { - #endif + # The class may have been declared with namespace or classname qualifiers. + # The constructor and destructor will not have those qualifiers. + base_classname = classinfo.name.split("::")[-1] + + # Look for single-argument constructors that aren't marked explicit. + # Technically a valid construct, but against style. + explicit_constructor_match = re.match( + r"\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?" + rf"(?:(?:inline|constexpr)\s+)*{re.escape(base_classname)}\s*" + r"\(((?:[^()]|\([^()]*\))*)\)", + line, + ) - We make the following assumptions (good enough for most files): - - Preprocessor condition evaluates to true from #if up to first - #else/#elif/#endif. + if explicit_constructor_match: + is_marked_explicit = explicit_constructor_match.group(1) - - Preprocessor condition evaluates to false from #else/#elif up - to #endif. We still perform lint checks on these lines, but - these do not affect nesting stack. + if not explicit_constructor_match.group(2): + constructor_args = [] + else: + constructor_args = explicit_constructor_match.group(2).split(",") + + # collapse arguments so that commas in template parameter lists and function + # argument parameter lists don't split arguments in two + i = 0 + while i < len(constructor_args): + constructor_arg = constructor_args[i] + while constructor_arg.count("<") > constructor_arg.count( + ">" + ) or constructor_arg.count("(") > constructor_arg.count(")"): + constructor_arg += "," + constructor_args[i + 1] + del constructor_args[i + 1] + constructor_args[i] = constructor_arg + i += 1 + + variadic_args = [arg for arg in constructor_args if "&&..." in arg] + defaulted_args = [arg for arg in constructor_args if "=" in arg] + noarg_constructor = ( + not constructor_args # empty arg list + or + # 'void' arg specifier + (len(constructor_args) == 1 and constructor_args[0].strip() == "void") + ) + onearg_constructor = ( + (len(constructor_args) == 1 and not noarg_constructor) # exactly one arg + or + # all but at most one arg defaulted + ( + len(constructor_args) >= 1 + and not noarg_constructor + and len(defaulted_args) >= len(constructor_args) - 1 + ) + or + # variadic arguments with zero or one argument + (len(constructor_args) <= 2 and len(variadic_args) >= 1) + ) + initializer_list_constructor = bool( + onearg_constructor + and re.search(r"\bstd\s*::\s*initializer_list\b", constructor_args[0]) + ) + copy_constructor = bool( + onearg_constructor + and re.match( + r"((const\s+(volatile\s+)?)?|(volatile\s+(const\s+)?))?" + rf"{re.escape(base_classname)}(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&", + constructor_args[0].strip(), + ) + ) + + if ( + not is_marked_explicit + and onearg_constructor + and not initializer_list_constructor + and not copy_constructor + ): + if defaulted_args or variadic_args: + error( + filename, + linenum, + "runtime/explicit", + 4, + "Constructors callable with one argument " + "should be marked explicit.", + ) + else: + error( + filename, + linenum, + "runtime/explicit", + 4, + "Single-parameter constructors should be marked explicit.", + ) - Args: - line: current line to check. - """ - if re.match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): - # Beginning of #if block, save the nesting stack here. The saved - # stack will allow us to restore the parsing state in the #else case. - self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) - elif re.match(r'^\s*#\s*(else|elif)\b', line): - # Beginning of #else block - if self.pp_stack: - if not self.pp_stack[-1].seen_else: - # This is the first #else or #elif block. Remember the - # whole nesting stack up to this point. This is what we - # keep after the #endif. - self.pp_stack[-1].seen_else = True - self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) - - # Restore the stack to how it was before the #if - self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) - else: - # TODO(unknown): unexpected #else, issue warning? - pass - elif re.match(r'^\s*#\s*endif\b', line): - # End of #if or #else blocks. - if self.pp_stack: - # If we saw an #else, we will need to restore the nesting - # stack to its former state before the #else, otherwise we - # will just continue from where we left off. - if self.pp_stack[-1].seen_else: - # Here we can just use a shallow copy since we are the last - # reference to it. - self.stack = self.pp_stack[-1].stack_before_else - # Drop the corresponding #if - self.pp_stack.pop() - else: - # TODO(unknown): unexpected #endif, issue warning? - pass - # TODO(unknown): Update() is too long, but we will refactor later. - def Update(self, filename, clean_lines, linenum, error): - """Update nesting state with current line. +def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): + """Checks for the correctness of various spacing around function calls. Args: filename: The name of the current file. @@ -3201,1839 +3860,1797 @@ def Update(self, filename, clean_lines, linenum, error): """ line = clean_lines.elided[linenum] - # Remember top of the previous nesting stack. - # - # The stack is always pushed/popped and not modified in place, so - # we can just do a shallow copy instead of copy.deepcopy. Using - # deepcopy would slow down cpplint by ~28%. - if self.stack: - self.previous_stack_top = self.stack[-1] - else: - self.previous_stack_top = None - - # Update pp_stack - self.UpdatePreprocessor(line) - - # Count parentheses. This is to avoid adding struct arguments to - # the nesting stack. - if self.stack: - inner_block = self.stack[-1] - depth_change = line.count('(') - line.count(')') - inner_block.open_parentheses += depth_change - - # Also check if we are starting or ending an inline assembly block. - if inner_block.inline_asm in (_NO_ASM, _END_ASM): - if (depth_change != 0 and - inner_block.open_parentheses == 1 and - _MATCH_ASM.match(line)): - # Enter assembly block - inner_block.inline_asm = _INSIDE_ASM - else: - # Not entering assembly block. If previous line was _END_ASM, - # we will now shift to _NO_ASM state. - inner_block.inline_asm = _NO_ASM - elif (inner_block.inline_asm == _INSIDE_ASM and - inner_block.open_parentheses == 0): - # Exit assembly block - inner_block.inline_asm = _END_ASM - - # Consume namespace declaration at the beginning of the line. Do - # this in a loop so that we catch same line declarations like this: - # namespace proto2 { namespace bridge { class MessageSet; } } - while True: - # Match start of namespace. The "\b\s*" below catches namespace - # declarations even if it weren't followed by a whitespace, this - # is so that we don't confuse our namespace checker. The - # missing spaces will be flagged by CheckSpacing. - namespace_decl_match = re.match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) - if not namespace_decl_match: - break - - new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) - self.stack.append(new_namespace) - - line = namespace_decl_match.group(2) - if line.find('{') != -1: - new_namespace.seen_open_brace = True - line = line[line.find('{') + 1:] - - # Look for a class declaration in whatever is left of the line - # after parsing namespaces. The regexp accounts for decorated classes - # such as in: - # class LOCKABLE API Object { - # }; - class_decl_match = re.match( - r'^(\s*(?:template\s*<[\w\s<>,:=]*>\s*)?' - r'(class|struct)\s+(?:[a-zA-Z0-9_]+\s+)*(\w+(?:::\w+)*))' - r'(.*)$', line) - if (class_decl_match and - (not self.stack or self.stack[-1].open_parentheses == 0)): - # We do not want to accept classes that are actually template arguments: - # template , - # template class Ignore3> - # void Function() {}; - # - # To avoid template argument cases, we scan forward and look for - # an unmatched '>'. If we see one, assume we are inside a - # template argument list. - end_declaration = len(class_decl_match.group(1)) - if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration): - self.stack.append(_ClassInfo( - class_decl_match.group(3), class_decl_match.group(2), - clean_lines, linenum)) - line = class_decl_match.group(4) - - # If we have not yet seen the opening brace for the innermost block, - # run checks here. - if not self.SeenOpenBrace(): - self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) - - # Update access control if we are inside a class/struct - if self.stack and isinstance(self.stack[-1], _ClassInfo): - classinfo = self.stack[-1] - access_match = re.match( - r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' - r':(?:[^:]|$)', - line) - if access_match: - classinfo.access = access_match.group(2) - - # Check that access keywords are indented +1 space. Skip this - # check if the keywords are not preceded by whitespaces. - indent = access_match.group(1) - if (len(indent) != classinfo.class_indent + 1 and - re.match(r'^\s*$', indent)): - if classinfo.is_struct: - parent = 'struct ' + classinfo.name - else: - parent = 'class ' + classinfo.name - slots = '' - if access_match.group(3): - slots = access_match.group(3) - error(filename, linenum, 'whitespace/indent', 3, - f'{access_match.group(2)}{slots}:' - f' should be indented +1 space inside {parent}') - - # Consume braces or semicolons from what's left of the line - while True: - # Match first brace, semicolon, or closed parenthesis. - matched = re.match(r'^[^{;)}]*([{;)}])(.*)$', line) - if not matched: - break - - token = matched.group(1) - if token == '{': - # If namespace or class hasn't seen a opening brace yet, mark - # namespace/class head as complete. Push a new block onto the - # stack otherwise. - if not self.SeenOpenBrace(): - self.stack[-1].seen_open_brace = True - elif re.match(r'^extern\s*"[^"]*"\s*\{', line): - self.stack.append(_ExternCInfo(linenum)) - else: - self.stack.append(_BlockInfo(linenum, True)) - if _MATCH_ASM.match(line): - self.stack[-1].inline_asm = _BLOCK_ASM - - elif token == ';' or token == ')': - # If we haven't seen an opening brace yet, but we already saw - # a semicolon, this is probably a forward declaration. Pop - # the stack for these. - # - # Similarly, if we haven't seen an opening brace yet, but we - # already saw a closing parenthesis, then these are probably - # function arguments with extra "class" or "struct" keywords. - # Also pop these stack for these. - if not self.SeenOpenBrace(): - self.stack.pop() - else: # token == '}' - # Perform end of block checks and pop the stack. - if self.stack: - self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) - self.stack.pop() - line = matched.group(2) + # Since function calls often occur inside if/for/while/switch + # expressions - which have their own, more liberal conventions - we + # first see if we should be looking inside such an expression for a + # function call, to which we can apply more strict standards. + fncall = line # if there's no control flow construct, look at whole line + for pattern in ( + r"\bif\s*\((.*)\)\s*{", + r"\bfor\s*\((.*)\)\s*{", + r"\bwhile\s*\((.*)\)\s*[{;]", + r"\bswitch\s*\((.*)\)\s*{", + ): + match = re.search(pattern, line) + if match: + fncall = match.group(1) # look inside the parens for function calls + break - def InnermostClass(self): - """Get class info on the top of the stack. + # Except in if/for/while/switch, there should never be space + # immediately inside parens (eg "f( 3, 4 )"). We make an exception + # for nested parens ( (a+b) + c ). Likewise, there should never be + # a space before a ( when it's a function argument. I assume it's a + # function argument when the char before the whitespace is legal in + # a function name (alnum + _) and we're not starting a macro. Also ignore + # pointers and references to arrays and functions coz they're too tricky: + # we use a very simple way to recognize these: + # " (something)(maybe-something)" or + # " (something)(maybe-something," or + # " (something)[something]" + # Note that we assume the contents of [] to be short enough that + # they'll never need to wrap. + if ( # Ignore control structures. + not re.search( + r"\b(if|elif|for|while|switch|return|new|delete|catch|sizeof)\b", fncall + ) + and + # Ignore pointers/references to functions. + not re.search(r" \([^)]+\)\([^)]*(\)|,$)", fncall) + and + # Ignore pointers/references to arrays. + not re.search(r" \([^)]+\)\[[^\]]+\]", fncall) + ): + if re.search(r"\w\s*\(\s(?!\s*\\$)", fncall): # a ( used for a fn call + error( + filename, + linenum, + "whitespace/parens", + 4, + "Extra space after ( in function call", + ) + elif re.search(r"\(\s+(?!(\s*\\)|\()", fncall): + error(filename, linenum, "whitespace/parens", 2, "Extra space after (") + if ( + re.search(r"\w\s+\(", fncall) + and not re.search(r"_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(", fncall) + and not re.search(r"#\s*define|typedef|using\s+\w+\s*=", fncall) + and not re.search(r"\w\s+\((\w+::)*\*\w+\)\(", fncall) + and not re.search(r"\bcase\s+\(", fncall) + ): + # TODO(unknown): Space after an operator function seem to be a common + # error, silence those for now by restricting them to highest verbosity. + if re.search(r"\boperator_*\b", line): + error( + filename, + linenum, + "whitespace/parens", + 0, + "Extra space before ( in function call", + ) + else: + error( + filename, + linenum, + "whitespace/parens", + 4, + "Extra space before ( in function call", + ) + # If the ) is followed only by a newline or a { + newline, assume it's + # part of a control statement (if/while/etc), and don't complain + if re.search(r"[^)]\s+\)\s*[^{\s]", fncall): + # If the closing parenthesis is preceded by only whitespaces, + # try to give a more descriptive error message. + if re.search(r"^\s+\)", fncall): + error( + filename, + linenum, + "whitespace/parens", + 2, + "Closing ) should be moved to the previous line", + ) + else: + error(filename, linenum, "whitespace/parens", 2, "Extra space before )") - Returns: - A _ClassInfo object if we are inside a class, or None otherwise. - """ - for i in range(len(self.stack), 0, -1): - classinfo = self.stack[i - 1] - if isinstance(classinfo, _ClassInfo): - return classinfo - return None - -def CheckForNonStandardConstructs(filename, clean_lines, linenum, - nesting_state, error): - r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. - - Complain about several constructs which gcc-2 accepts, but which are - not standard C++. Warning about these in lint is one way to ease the - transition to new compilers. - - put storage class first (e.g. "static const" instead of "const static"). - - "%lld" instead of %qd" in printf-type functions. - - "%1$d" is non-standard in printf-type functions. - - "\%" is an undefined character escape sequence. - - text after #endif is not allowed. - - invalid inner-style forward declaration. - - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', - line): - error(filename, linenum, 'build/deprecated', 3, - '>? and ))?' - # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' - error(filename, linenum, 'runtime/member_string_references', 2, - 'const string& members are dangerous. It is much better to use ' - 'alternatives, such as pointers or simple constants.') - - # Everything else in this function operates on class declarations. - # Return early if the top of the nesting stack is not a class, or if - # the class head is not completed yet. - classinfo = nesting_state.InnermostClass() - if not classinfo or not classinfo.seen_open_brace: - return - # The class may have been declared with namespace or classname qualifiers. - # The constructor and destructor will not have those qualifiers. - base_classname = classinfo.name.split('::')[-1] +def IsBlankLine(line): + """Returns true if the given line is blank. + + We consider a line to be blank if the line is empty or consists of + only white spaces. - # Look for single-argument constructors that aren't marked explicit. - # Technically a valid construct, but against style. - explicit_constructor_match = re.match( - r'\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?' - rf'(?:(?:inline|constexpr)\s+)*{re.escape(base_classname)}\s*' - r'\(((?:[^()]|\([^()]*\))*)\)', line) + Args: + line: A line of a string. - if explicit_constructor_match: - is_marked_explicit = explicit_constructor_match.group(1) + Returns: + True, if the given line is blank. + """ + return not line or line.isspace() - if not explicit_constructor_match.group(2): - constructor_args = [] - else: - constructor_args = explicit_constructor_match.group(2).split(',') - - # collapse arguments so that commas in template parameter lists and function - # argument parameter lists don't split arguments in two - i = 0 - while i < len(constructor_args): - constructor_arg = constructor_args[i] - while (constructor_arg.count('<') > constructor_arg.count('>') or - constructor_arg.count('(') > constructor_arg.count(')')): - constructor_arg += ',' + constructor_args[i + 1] - del constructor_args[i + 1] - constructor_args[i] = constructor_arg - i += 1 - - variadic_args = [arg for arg in constructor_args if '&&...' in arg] - defaulted_args = [arg for arg in constructor_args if '=' in arg] - noarg_constructor = (not constructor_args or # empty arg list - # 'void' arg specifier - (len(constructor_args) == 1 and - constructor_args[0].strip() == 'void')) - onearg_constructor = ((len(constructor_args) == 1 and # exactly one arg - not noarg_constructor) or - # all but at most one arg defaulted - (len(constructor_args) >= 1 and - not noarg_constructor and - len(defaulted_args) >= len(constructor_args) - 1) or - # variadic arguments with zero or one argument - (len(constructor_args) <= 2 and - len(variadic_args) >= 1)) - initializer_list_constructor = bool( - onearg_constructor and - re.search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) - copy_constructor = bool( - onearg_constructor and - re.match(r'((const\s+(volatile\s+)?)?|(volatile\s+(const\s+)?))?' - rf'{re.escape(base_classname)}(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&', - constructor_args[0].strip()) - ) - if (not is_marked_explicit and - onearg_constructor and - not initializer_list_constructor and - not copy_constructor): - if defaulted_args or variadic_args: - error(filename, linenum, 'runtime/explicit', 4, - 'Constructors callable with one argument ' - 'should be marked explicit.') - else: - error(filename, linenum, 'runtime/explicit', 4, - 'Single-parameter constructors should be marked explicit.') +def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, error): + is_namespace_indent_item = len(nesting_state.stack) >= 1 and ( + isinstance(nesting_state.stack[-1], _NamespaceInfo) + or (isinstance(nesting_state.previous_stack_top, _NamespaceInfo)) + ) + if ShouldCheckNamespaceIndentation( + nesting_state, is_namespace_indent_item, clean_lines.elided, line + ): + CheckItemIndentationInNamespace(filename, clean_lines.elided, line, error) -def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): - """Checks for the correctness of various spacing around function calls. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Since function calls often occur inside if/for/while/switch - # expressions - which have their own, more liberal conventions - we - # first see if we should be looking inside such an expression for a - # function call, to which we can apply more strict standards. - fncall = line # if there's no control flow construct, look at whole line - for pattern in (r'\bif\s*\((.*)\)\s*{', - r'\bfor\s*\((.*)\)\s*{', - r'\bwhile\s*\((.*)\)\s*[{;]', - r'\bswitch\s*\((.*)\)\s*{'): - match = re.search(pattern, line) - if match: - fncall = match.group(1) # look inside the parens for function calls - break - - # Except in if/for/while/switch, there should never be space - # immediately inside parens (eg "f( 3, 4 )"). We make an exception - # for nested parens ( (a+b) + c ). Likewise, there should never be - # a space before a ( when it's a function argument. I assume it's a - # function argument when the char before the whitespace is legal in - # a function name (alnum + _) and we're not starting a macro. Also ignore - # pointers and references to arrays and functions coz they're too tricky: - # we use a very simple way to recognize these: - # " (something)(maybe-something)" or - # " (something)(maybe-something," or - # " (something)[something]" - # Note that we assume the contents of [] to be short enough that - # they'll never need to wrap. - if ( # Ignore control structures. - not re.search(r'\b(if|elif|for|while|switch|return|new|delete|catch|sizeof)\b', - fncall) and - # Ignore pointers/references to functions. - not re.search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and - # Ignore pointers/references to arrays. - not re.search(r' \([^)]+\)\[[^\]]+\]', fncall)): - if re.search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call - error(filename, linenum, 'whitespace/parens', 4, - 'Extra space after ( in function call') - elif re.search(r'\(\s+(?!(\s*\\)|\()', fncall): - error(filename, linenum, 'whitespace/parens', 2, - 'Extra space after (') - if (re.search(r'\w\s+\(', fncall) and - not re.search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and - not re.search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and - not re.search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and - not re.search(r'\bcase\s+\(', fncall)): - # TODO(unknown): Space after an operator function seem to be a common - # error, silence those for now by restricting them to highest verbosity. - if re.search(r'\boperator_*\b', line): - error(filename, linenum, 'whitespace/parens', 0, - 'Extra space before ( in function call') - else: - error(filename, linenum, 'whitespace/parens', 4, - 'Extra space before ( in function call') - # If the ) is followed only by a newline or a { + newline, assume it's - # part of a control statement (if/while/etc), and don't complain - if re.search(r'[^)]\s+\)\s*[^{\s]', fncall): - # If the closing parenthesis is preceded by only whitespaces, - # try to give a more descriptive error message. - if re.search(r'^\s+\)', fncall): - error(filename, linenum, 'whitespace/parens', 2, - 'Closing ) should be moved to the previous line') - else: - error(filename, linenum, 'whitespace/parens', 2, - 'Extra space before )') +def CheckForFunctionLengths(filename, clean_lines, linenum, function_state, error): + """Reports for long function bodies. -def IsBlankLine(line): - """Returns true if the given line is blank. - - We consider a line to be blank if the line is empty or consists of - only white spaces. - - Args: - line: A line of a string. - - Returns: - True, if the given line is blank. - """ - return not line or line.isspace() - - -def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, - error): - is_namespace_indent_item = ( - len(nesting_state.stack) >= 1 and - (isinstance(nesting_state.stack[-1], _NamespaceInfo) or - (isinstance(nesting_state.previous_stack_top, _NamespaceInfo))) - ) - - if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, - clean_lines.elided, line): - CheckItemIndentationInNamespace(filename, clean_lines.elided, - line, error) - - -def CheckForFunctionLengths(filename, clean_lines, linenum, - function_state, error): - """Reports for long function bodies. - - For an overview why this is done, see: - https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions - - Uses a simplistic algorithm assuming other style guidelines - (especially spacing) are followed. - Only checks unindented functions, so class members are unchecked. - Trivial bodies are unchecked, so constructors with huge initializer lists - may be missed. - Blank/comment lines are not counted so as to avoid encouraging the removal - of vertical space and comments just to get through a lint check. - NOLINT *on the last line of a function* disables this check. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - function_state: Current function name and lines in body so far. - error: The function to call with any errors found. - """ - lines = clean_lines.lines - line = lines[linenum] - joined_line = '' - - starting_func = False - regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... - match_result = re.match(regexp, line) - if match_result: - # If the name is all caps and underscores, figure it's a macro and - # ignore it, unless it's TEST or TEST_F. - function_name = match_result.group(1).split()[-1] - if function_name == 'TEST' or function_name == 'TEST_F' or ( - not re.match(r'[A-Z_]+$', function_name)): - starting_func = True - - if starting_func: - body_found = False - for start_linenum in range(linenum, clean_lines.NumLines()): - start_line = lines[start_linenum] - joined_line += ' ' + start_line.lstrip() - if re.search(r'(;|})', start_line): # Declarations and trivial functions - body_found = True - break # ... ignore - if re.search(r'{', start_line): - body_found = True - function = re.search(r'((\w|:)*)\(', line).group(1) - if re.match(r'TEST', function): # Handle TEST... macros - parameter_regexp = re.search(r'(\(.*\))', joined_line) - if parameter_regexp: # Ignore bad syntax - function += parameter_regexp.group(1) - else: - function += '()' - function_state.Begin(function) - break - if not body_found: - # No body for the function (or evidence of a non-function) was found. - error(filename, linenum, 'readability/fn_size', 5, - 'Lint failed to find start of function body.') - elif re.match(r'^\}\s*$', line): # function end - function_state.Check(error, filename, linenum) - function_state.End() - elif not re.match(r'^\s*$', line): - function_state.Count() # Count non-blank/non-comment lines. + For an overview why this is done, see: + https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions + Uses a simplistic algorithm assuming other style guidelines + (especially spacing) are followed. + Only checks unindented functions, so class members are unchecked. + Trivial bodies are unchecked, so constructors with huge initializer lists + may be missed. + Blank/comment lines are not counted so as to avoid encouraging the removal + of vertical space and comments just to get through a lint check. + NOLINT *on the last line of a function* disables this check. -_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + function_state: Current function name and lines in body so far. + error: The function to call with any errors found. + """ + lines = clean_lines.lines + line = lines[linenum] + joined_line = "" + + starting_func = False + regexp = r"(\w(\w|::|\*|\&|\s)*)\(" # decls * & space::name( ... + match_result = re.match(regexp, line) + if match_result: + # If the name is all caps and underscores, figure it's a macro and + # ignore it, unless it's TEST or TEST_F. + function_name = match_result.group(1).split()[-1] + if ( + function_name == "TEST" + or function_name == "TEST_F" + or (not re.match(r"[A-Z_]+$", function_name)) + ): + starting_func = True + + if starting_func: + body_found = False + for start_linenum in range(linenum, clean_lines.NumLines()): + start_line = lines[start_linenum] + joined_line += " " + start_line.lstrip() + if re.search(r"(;|})", start_line): # Declarations and trivial functions + body_found = True + break # ... ignore + if re.search(r"{", start_line): + body_found = True + function = re.search(r"((\w|:)*)\(", line).group(1) + if re.match(r"TEST", function): # Handle TEST... macros + parameter_regexp = re.search(r"(\(.*\))", joined_line) + if parameter_regexp: # Ignore bad syntax + function += parameter_regexp.group(1) + else: + function += "()" + function_state.Begin(function) + break + if not body_found: + # No body for the function (or evidence of a non-function) was found. + error( + filename, + linenum, + "readability/fn_size", + 5, + "Lint failed to find start of function body.", + ) + elif re.match(r"^\}\s*$", line): # function end + function_state.Check(error, filename, linenum) + function_state.End() + elif not re.match(r"^\s*$", line): + function_state.Count() # Count non-blank/non-comment lines. + + +_RE_PATTERN_TODO = re.compile(r"^//(\s*)TODO(\(.+?\))?:?(\s|$)?") def CheckComment(line, filename, linenum, next_line_start, error): - """Checks for common mistakes in comments. - - Args: - line: The line in question. - filename: The name of the current file. - linenum: The number of the line to check. - next_line_start: The first non-whitespace column of the next line. - error: The function to call with any errors found. - """ - commentpos = line.find('//') - if commentpos != -1: - # Check if the // may be in quotes. If so, ignore it - if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0: - # Allow one space for new scopes, two spaces otherwise: - if (not (re.match(r'^.*{ *//', line) and next_line_start == commentpos) and - ((commentpos >= 1 and - line[commentpos-1] not in string.whitespace) or - (commentpos >= 2 and - line[commentpos-2] not in string.whitespace))): - error(filename, linenum, 'whitespace/comments', 2, - 'At least two spaces is best between code and comments') - - # Checks for common mistakes in TODO comments. - comment = line[commentpos:] - match = _RE_PATTERN_TODO.match(comment) - if match: - # One whitespace is correct; zero whitespace is handled elsewhere. - leading_whitespace = match.group(1) - if len(leading_whitespace) > 1: - error(filename, linenum, 'whitespace/todo', 2, - 'Too many spaces before TODO') - - username = match.group(2) - if not username: - error(filename, linenum, 'readability/todo', 2, - 'Missing username in TODO; it should look like ' - '"// TODO(my_username): Stuff."') - - middle_whitespace = match.group(3) - # Comparisons made explicit for correctness - # -- pylint: disable=g-explicit-bool-comparison - if middle_whitespace != ' ' and middle_whitespace != '': - error(filename, linenum, 'whitespace/todo', 2, - 'TODO(my_username) should be followed by a space') - - # If the comment contains an alphanumeric character, there - # should be a space somewhere between it and the // unless - # it's a /// or //! Doxygen comment. - if (re.match(r'//[^ ]*\w', comment) and - not re.match(r'(///|//\!)(\s+|$)', comment)): - error(filename, linenum, 'whitespace/comments', 4, - 'Should have a space between // and comment') + """Checks for common mistakes in comments. + + Args: + line: The line in question. + filename: The name of the current file. + linenum: The number of the line to check. + next_line_start: The first non-whitespace column of the next line. + error: The function to call with any errors found. + """ + commentpos = line.find("//") + if commentpos != -1: + # Check if the // may be in quotes. If so, ignore it + if re.sub(r"\\.", "", line[0:commentpos]).count('"') % 2 == 0: + # Allow one space for new scopes, two spaces otherwise: + if not (re.match(r"^.*{ *//", line) and next_line_start == commentpos) and ( + (commentpos >= 1 and line[commentpos - 1] not in string.whitespace) + or (commentpos >= 2 and line[commentpos - 2] not in string.whitespace) + ): + error( + filename, + linenum, + "whitespace/comments", + 2, + "At least two spaces is best between code and comments", + ) + + # Checks for common mistakes in TODO comments. + comment = line[commentpos:] + match = _RE_PATTERN_TODO.match(comment) + if match: + # One whitespace is correct; zero whitespace is handled elsewhere. + leading_whitespace = match.group(1) + if len(leading_whitespace) > 1: + error( + filename, + linenum, + "whitespace/todo", + 2, + "Too many spaces before TODO", + ) + + username = match.group(2) + if not username: + error( + filename, + linenum, + "readability/todo", + 2, + "Missing username in TODO; it should look like " + '"// TODO(my_username): Stuff."', + ) + + middle_whitespace = match.group(3) + # Comparisons made explicit for correctness + # -- pylint: disable=g-explicit-bool-comparison + if middle_whitespace != " " and middle_whitespace != "": + error( + filename, + linenum, + "whitespace/todo", + 2, + "TODO(my_username) should be followed by a space", + ) + + # If the comment contains an alphanumeric character, there + # should be a space somewhere between it and the // unless + # it's a /// or //! Doxygen comment. + if re.match(r"//[^ ]*\w", comment) and not re.match( + r"(///|//\!)(\s+|$)", comment + ): + error( + filename, + linenum, + "whitespace/comments", + 4, + "Should have a space between // and comment", + ) def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): - """Checks for the correctness of various spacing issues in the code. - - Things we check for: spaces around operators, spaces after - if/for/while/switch, no spaces around parens in function calls, two - spaces between code and comment, don't start a block with a blank - line, don't end a function with a blank line, don't add a blank line - after public/protected/private, don't have too many blank lines in a row. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - - # Don't use "elided" lines here, otherwise we can't check commented lines. - # Don't want to use "raw" either, because we don't want to check inside C++11 - # raw strings, - raw = clean_lines.lines_without_raw_strings - line = raw[linenum] - - # Before nixing comments, check if the line is blank for no good - # reason. This includes the first line after a block is opened, and - # blank lines at the end of a function (ie, right before a line like '}' - # - # Skip all the blank line checks if we are immediately inside a - # namespace body. In other words, don't issue blank line warnings - # for this block: - # namespace { - # - # } - # - # A warning about missing end of namespace comments will be issued instead. - # - # Also skip blank line checks for 'extern "C"' blocks, which are formatted - # like namespaces. - if (IsBlankLine(line) and - not nesting_state.InNamespaceBody() and - not nesting_state.InExternC()): - elided = clean_lines.elided - prev_line = elided[linenum - 1] - prevbrace = prev_line.rfind('{') - # TODO(unknown): Don't complain if line before blank line, and line after, - # both start with alnums and are indented the same amount. - # This ignores whitespace at the start of a namespace block - # because those are not usually indented. - if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: - # OK, we have a blank line at the start of a code block. Before we - # complain, we check if it is an exception to the rule: The previous - # non-empty line has the parameters of a function header that are indented - # 4 spaces (because they did not fit in a 80 column line when placed on - # the same line as the function name). We also check for the case where - # the previous line is indented 6 spaces, which may happen when the - # initializers of a constructor do not fit into a 80 column line. - exception = False - if re.match(r' {6}\w', prev_line): # Initializer list? - # We are looking for the opening column of initializer list, which - # should be indented 4 spaces to cause 6 space indentation afterwards. - search_position = linenum-2 - while (search_position >= 0 - and re.match(r' {6}\w', elided[search_position])): - search_position -= 1 - exception = (search_position >= 0 - and elided[search_position][:5] == ' :') - else: - # Search for the function arguments or an initializer list. We use a - # simple heuristic here: If the line is indented 4 spaces; and we have a - # closing paren, without the opening paren, followed by an opening brace - # or colon (for initializer lists) we assume that it is the last line of - # a function header. If we have a colon indented 4 spaces, it is an - # initializer list. - exception = (re.match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', - prev_line) - or re.match(r' {4}:', prev_line)) - - if not exception: - error(filename, linenum, 'whitespace/blank_line', 2, - 'Redundant blank line at the start of a code block ' - 'should be deleted.') - # Ignore blank lines at the end of a block in a long if-else - # chain, like this: - # if (condition1) { - # // Something followed by a blank line + """Checks for the correctness of various spacing issues in the code. + + Things we check for: spaces around operators, spaces after + if/for/while/switch, no spaces around parens in function calls, two + spaces between code and comment, don't start a block with a blank + line, don't end a function with a blank line, don't add a blank line + after public/protected/private, don't have too many blank lines in a row. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw = clean_lines.lines_without_raw_strings + line = raw[linenum] + + # Before nixing comments, check if the line is blank for no good + # reason. This includes the first line after a block is opened, and + # blank lines at the end of a function (ie, right before a line like '}' + # + # Skip all the blank line checks if we are immediately inside a + # namespace body. In other words, don't issue blank line warnings + # for this block: + # namespace { # - # } else if (condition2) { - # // Something else # } + # + # A warning about missing end of namespace comments will be issued instead. + # + # Also skip blank line checks for 'extern "C"' blocks, which are formatted + # like namespaces. + if ( + IsBlankLine(line) + and not nesting_state.InNamespaceBody() + and not nesting_state.InExternC() + ): + elided = clean_lines.elided + prev_line = elided[linenum - 1] + prevbrace = prev_line.rfind("{") + # TODO(unknown): Don't complain if line before blank line, and line after, + # both start with alnums and are indented the same amount. + # This ignores whitespace at the start of a namespace block + # because those are not usually indented. + if prevbrace != -1 and prev_line[prevbrace:].find("}") == -1: + # OK, we have a blank line at the start of a code block. Before we + # complain, we check if it is an exception to the rule: The previous + # non-empty line has the parameters of a function header that are indented + # 4 spaces (because they did not fit in a 80 column line when placed on + # the same line as the function name). We also check for the case where + # the previous line is indented 6 spaces, which may happen when the + # initializers of a constructor do not fit into a 80 column line. + exception = False + if re.match(r" {6}\w", prev_line): # Initializer list? + # We are looking for the opening column of initializer list, which + # should be indented 4 spaces to cause 6 space indentation afterwards. + search_position = linenum - 2 + while search_position >= 0 and re.match( + r" {6}\w", elided[search_position] + ): + search_position -= 1 + exception = ( + search_position >= 0 and elided[search_position][:5] == " :" + ) + else: + # Search for the function arguments or an initializer list. We use a + # simple heuristic here: If the line is indented 4 spaces; and we have a + # closing paren, without the opening paren, followed by an opening brace + # or colon (for initializer lists) we assume that it is the last line of + # a function header. If we have a colon indented 4 spaces, it is an + # initializer list. + exception = re.match( + r" {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)", prev_line + ) or re.match(r" {4}:", prev_line) + + if not exception: + error( + filename, + linenum, + "whitespace/blank_line", + 2, + "Redundant blank line at the start of a code block " + "should be deleted.", + ) + # Ignore blank lines at the end of a block in a long if-else + # chain, like this: + # if (condition1) { + # // Something followed by a blank line + # + # } else if (condition2) { + # // Something else + # } + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + if ( + next_line + and re.match(r"\s*}", next_line) + and next_line.find("} else ") == -1 + ): + error( + filename, + linenum, + "whitespace/blank_line", + 3, + "Redundant blank line at the end of a code block " + "should be deleted.", + ) + + matched = re.match(r"\s*(public|protected|private):", prev_line) + if matched: + error( + filename, + linenum, + "whitespace/blank_line", + 3, + f'Do not leave a blank line after "{matched.group(1)}:"', + ) + + # Next, check comments + next_line_start = 0 if linenum + 1 < clean_lines.NumLines(): - next_line = raw[linenum + 1] - if (next_line - and re.match(r'\s*}', next_line) - and next_line.find('} else ') == -1): - error(filename, linenum, 'whitespace/blank_line', 3, - 'Redundant blank line at the end of a code block ' - 'should be deleted.') - - matched = re.match(r'\s*(public|protected|private):', prev_line) - if matched: - error(filename, linenum, 'whitespace/blank_line', 3, - f'Do not leave a blank line after "{matched.group(1)}:"') - - # Next, check comments - next_line_start = 0 - if linenum + 1 < clean_lines.NumLines(): - next_line = raw[linenum + 1] - next_line_start = len(next_line) - len(next_line.lstrip()) - CheckComment(line, filename, linenum, next_line_start, error) - - # get rid of comments and strings - line = clean_lines.elided[linenum] - - # You shouldn't have spaces before your brackets, except for C++11 attributes - # or maybe after 'delete []', 'return []() {};', or 'auto [abc, ...] = ...;'. - if (re.search(r'\w\s+\[(?!\[)', line) and - not re.search(r'(?:auto&?|delete|return)\s+\[', line)): - error(filename, linenum, 'whitespace/braces', 5, - 'Extra space before [') - - # In range-based for, we wanted spaces before and after the colon, but - # not around "::" tokens that might appear. - if (re.search(r'for *\(.*[^:]:[^: ]', line) or - re.search(r'for *\(.*[^: ]:[^:]', line)): - error(filename, linenum, 'whitespace/forcolon', 2, - 'Missing space around colon in range-based for loop') + next_line = raw[linenum + 1] + next_line_start = len(next_line) - len(next_line.lstrip()) + CheckComment(line, filename, linenum, next_line_start, error) + + # get rid of comments and strings + line = clean_lines.elided[linenum] + + # You shouldn't have spaces before your brackets, except for C++11 attributes + # or maybe after 'delete []', 'return []() {};', or 'auto [abc, ...] = ...;'. + if re.search(r"\w\s+\[(?!\[)", line) and not re.search( + r"(?:auto&?|delete|return)\s+\[", line + ): + error(filename, linenum, "whitespace/braces", 5, "Extra space before [") + + # In range-based for, we wanted spaces before and after the colon, but + # not around "::" tokens that might appear. + if re.search(r"for *\(.*[^:]:[^: ]", line) or re.search( + r"for *\(.*[^: ]:[^:]", line + ): + error( + filename, + linenum, + "whitespace/forcolon", + 2, + "Missing space around colon in range-based for loop", + ) def CheckOperatorSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing around operators. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Don't try to do spacing checks for operator methods. Do this by - # replacing the troublesome characters with something else, - # preserving column position for all other characters. - # - # The replacement is done repeatedly to avoid false positives from - # operators that call operators. - while True: - match = re.match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line) + """Checks for horizontal spacing around operators. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Don't try to do spacing checks for operator methods. Do this by + # replacing the troublesome characters with something else, + # preserving column position for all other characters. + # + # The replacement is done repeatedly to avoid false positives from + # operators that call operators. + while True: + match = re.match(r"^(.*\boperator\b)(\S+)(\s*\(.*)$", line) + if match: + line = match.group(1) + ("_" * len(match.group(2))) + match.group(3) + else: + break + + # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". + # Otherwise not. Note we only check for non-spaces on *both* sides; + # sometimes people put non-spaces on one side when aligning ='s among + # many lines (not that this is behavior that I approve of...) + if ( + (re.search(r"[\w.]=", line) or re.search(r"=[\w.]", line)) + and not re.search(r"\b(if|while|for) ", line) + # Operators taken from [lex.operators] in C++11 standard. + and not re.search(r"(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)", line) + and not re.search(r"operator=", line) + ): + error(filename, linenum, "whitespace/operators", 4, "Missing spaces around =") + + # It's ok not to have spaces around binary operators like + - * /, but if + # there's too little whitespace, we get concerned. It's hard to tell, + # though, so we punt on this one for now. TODO. + + # You should always have whitespace around binary operators. + # + # Check <= and >= first to avoid false positives with < and >, then + # check non-include lines for spacing around < and >. + # + # If the operator is followed by a comma, assume it's be used in a + # macro context and don't do any checks. This avoids false + # positives. + # + # Note that && is not included here. This is because there are too + # many false positives due to RValue references. + match = re.search(r"[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]", line) if match: - line = match.group(1) + ('_' * len(match.group(2))) + match.group(3) - else: - break - - # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". - # Otherwise not. Note we only check for non-spaces on *both* sides; - # sometimes people put non-spaces on one side when aligning ='s among - # many lines (not that this is behavior that I approve of...) - if ((re.search(r'[\w.]=', line) or - re.search(r'=[\w.]', line)) - and not re.search(r'\b(if|while|for) ', line) - # Operators taken from [lex.operators] in C++11 standard. - and not re.search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) - and not re.search(r'operator=', line)): - error(filename, linenum, 'whitespace/operators', 4, - 'Missing spaces around =') - - # It's ok not to have spaces around binary operators like + - * /, but if - # there's too little whitespace, we get concerned. It's hard to tell, - # though, so we punt on this one for now. TODO. - - # You should always have whitespace around binary operators. - # - # Check <= and >= first to avoid false positives with < and >, then - # check non-include lines for spacing around < and >. - # - # If the operator is followed by a comma, assume it's be used in a - # macro context and don't do any checks. This avoids false - # positives. - # - # Note that && is not included here. This is because there are too - # many false positives due to RValue references. - match = re.search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line) - if match: - # TODO: support alternate operators - error(filename, linenum, 'whitespace/operators', 3, - f'Missing spaces around {match.group(1)}') - elif not re.match(r'#.*include', line): - # Look for < that is not surrounded by spaces. This is only - # triggered if both sides are missing spaces, even though - # technically should should flag if at least one side is missing a - # space. This is done to avoid some false positives with shifts. - match = re.match(r'^(.*[^\s<])<[^\s=<,]', line) + # TODO: support alternate operators + error( + filename, + linenum, + "whitespace/operators", + 3, + f"Missing spaces around {match.group(1)}", + ) + elif not re.match(r"#.*include", line): + # Look for < that is not surrounded by spaces. This is only + # triggered if both sides are missing spaces, even though + # technically should should flag if at least one side is missing a + # space. This is done to avoid some false positives with shifts. + match = re.match(r"^(.*[^\s<])<[^\s=<,]", line) + if match: + (_, _, end_pos) = CloseExpression(clean_lines, linenum, len(match.group(1))) + if end_pos <= -1: + error( + filename, + linenum, + "whitespace/operators", + 3, + "Missing spaces around <", + ) + + # Look for > that is not surrounded by spaces. Similar to the + # above, we only trigger if both sides are missing spaces to avoid + # false positives with shifts. + match = re.match(r"^(.*[^-\s>])>[^\s=>,]", line) + if match: + (_, _, start_pos) = ReverseCloseExpression( + clean_lines, linenum, len(match.group(1)) + ) + if start_pos <= -1: + error( + filename, + linenum, + "whitespace/operators", + 3, + "Missing spaces around >", + ) + + # We allow no-spaces around << when used like this: 10<<20, but + # not otherwise (particularly, not when used as streams) + # + # We also allow operators following an opening parenthesis, since + # those tend to be macros that deal with operators. + match = re.search( + r"(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])", line + ) + if ( + match + and not (match.group(1).isdigit() and match.group(2).isdigit()) + and not (match.group(1) == "operator" and match.group(2) == ";") + ): + error(filename, linenum, "whitespace/operators", 3, "Missing spaces around <<") + + # We allow no-spaces around >> for almost anything. This is because + # C++11 allows ">>" to close nested templates, which accounts for + # most cases when ">>" is not followed by a space. + # + # We still warn on ">>" followed by alpha character, because that is + # likely due to ">>" being used for right shifts, e.g.: + # value >> alpha + # + # When ">>" is used to close templates, the alphanumeric letter that + # follows would be part of an identifier, and there should still be + # a space separating the template type and the identifier. + # type> alpha + match = re.search(r">>[a-zA-Z_]", line) if match: - (_, _, end_pos) = CloseExpression( - clean_lines, linenum, len(match.group(1))) - if end_pos <= -1: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around <') - - # Look for > that is not surrounded by spaces. Similar to the - # above, we only trigger if both sides are missing spaces to avoid - # false positives with shifts. - match = re.match(r'^(.*[^-\s>])>[^\s=>,]', line) + error(filename, linenum, "whitespace/operators", 3, "Missing spaces around >>") + + # There shouldn't be space around unary operators + match = re.search(r"(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])", line) if match: - (_, _, start_pos) = ReverseCloseExpression( - clean_lines, linenum, len(match.group(1))) - if start_pos <= -1: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around >') - - # We allow no-spaces around << when used like this: 10<<20, but - # not otherwise (particularly, not when used as streams) - # - # We also allow operators following an opening parenthesis, since - # those tend to be macros that deal with operators. - match = re.search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line) - if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and - not (match.group(1) == 'operator' and match.group(2) == ';')): - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around <<') - - # We allow no-spaces around >> for almost anything. This is because - # C++11 allows ">>" to close nested templates, which accounts for - # most cases when ">>" is not followed by a space. - # - # We still warn on ">>" followed by alpha character, because that is - # likely due to ">>" being used for right shifts, e.g.: - # value >> alpha - # - # When ">>" is used to close templates, the alphanumeric letter that - # follows would be part of an identifier, and there should still be - # a space separating the template type and the identifier. - # type> alpha - match = re.search(r'>>[a-zA-Z_]', line) - if match: - error(filename, linenum, 'whitespace/operators', 3, - 'Missing spaces around >>') - - # There shouldn't be space around unary operators - match = re.search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) - if match: - error(filename, linenum, 'whitespace/operators', 4, - f'Extra space for operator {match.group(1)}') + error( + filename, + linenum, + "whitespace/operators", + 4, + f"Extra space for operator {match.group(1)}", + ) def CheckParenthesisSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing around parentheses. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # No spaces after an if, while, switch, or for - match = re.search(r' (if\(|for\(|while\(|switch\()', line) - if match: - error(filename, linenum, 'whitespace/parens', 5, - f'Missing space before ( in {match.group(1)}') - - # For if/for/while/switch, the left and right parens should be - # consistent about how many spaces are inside the parens, and - # there should either be zero or one spaces inside the parens. - # We don't want: "if ( foo)" or "if ( foo )". - # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. - match = re.search(r'\b(if|for|while|switch)\s*' - r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', - line) - if match: - if len(match.group(2)) != len(match.group(4)): - if not (match.group(3) == ';' and - len(match.group(2)) == 1 + len(match.group(4)) or - not match.group(2) and re.search(r'\bfor\s*\(.*; \)', line)): - error(filename, linenum, 'whitespace/parens', 5, - f'Mismatching spaces inside () in {match.group(1)}') - if len(match.group(2)) not in [0, 1]: - error(filename, linenum, 'whitespace/parens', 5, - f'Should have zero or one spaces inside ( and ) in {match.group(1)}') - - -def CheckCommaSpacing(filename, clean_lines, linenum, error): - """Checks for horizontal spacing near commas and semicolons. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - raw = clean_lines.lines_without_raw_strings - line = clean_lines.elided[linenum] - - # You should always have a space after a comma (either as fn arg or operator) - # - # This does not apply when the non-space character following the - # comma is another comma, since the only time when that happens is - # for empty macro arguments. - # - # We run this check in two passes: first pass on elided lines to - # verify that lines contain missing whitespaces, second pass on raw - # lines to confirm that those missing whitespaces are not due to - # elided comments. - match = re.search(r',[^,\s]', re.sub(r'\b__VA_OPT__\s*\(,\)', '', - re.sub(r'\boperator\s*,\s*\(', 'F(', line))) - if (match and re.search(r',[^,\s]', raw[linenum])): - error(filename, linenum, 'whitespace/comma', 3, - 'Missing space after ,') - - # You should always have a space after a semicolon - # except for few corner cases - # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more - # space after ; - if re.search(r';[^\s};\\)/]', line): - error(filename, linenum, 'whitespace/semicolon', 3, - 'Missing space after ;') + """Checks for horizontal spacing around parentheses. + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] -def _IsType(clean_lines, nesting_state, expr): - """Check if expression looks like a type name, returns true if so. - - Args: - clean_lines: A CleansedLines instance containing the file. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - expr: The expression to check. - Returns: - True, if token looks like a type. - """ - # Keep only the last token in the expression - last_word = re.match(r'^.*(\b\S+)$', expr) - if last_word: - token = last_word.group(1) - else: - token = expr - - # Match native types and stdint types - if _TYPES.match(token): - return True + # No spaces after an if, while, switch, or for + match = re.search(r" (if\(|for\(|while\(|switch\()", line) + if match: + error( + filename, + linenum, + "whitespace/parens", + 5, + f"Missing space before ( in {match.group(1)}", + ) + + # For if/for/while/switch, the left and right parens should be + # consistent about how many spaces are inside the parens, and + # there should either be zero or one spaces inside the parens. + # We don't want: "if ( foo)" or "if ( foo )". + # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. + match = re.search( + r"\b(if|for|while|switch)\s*" r"\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$", line + ) + if match: + if len(match.group(2)) != len(match.group(4)): + if not ( + match.group(3) == ";" + and len(match.group(2)) == 1 + len(match.group(4)) + or not match.group(2) + and re.search(r"\bfor\s*\(.*; \)", line) + ): + error( + filename, + linenum, + "whitespace/parens", + 5, + f"Mismatching spaces inside () in {match.group(1)}", + ) + if len(match.group(2)) not in [0, 1]: + error( + filename, + linenum, + "whitespace/parens", + 5, + f"Should have zero or one spaces inside ( and ) in {match.group(1)}", + ) - # Try a bit harder to match templated types. Walk up the nesting - # stack until we find something that resembles a typename - # declaration for what we are looking for. - typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) + - r'\b') - block_index = len(nesting_state.stack) - 1 - while block_index >= 0: - if isinstance(nesting_state.stack[block_index], _NamespaceInfo): - return False - - # Found where the opening brace is. We want to scan from this - # line up to the beginning of the function, minus a few lines. - # template - # class C - # : public ... { // start scanning here - last_line = nesting_state.stack[block_index].starting_linenum - - next_block_start = 0 - if block_index > 0: - next_block_start = nesting_state.stack[block_index - 1].starting_linenum - first_line = last_line - while first_line >= next_block_start: - if clean_lines.elided[first_line].find('template') >= 0: - break - first_line -= 1 - if first_line < next_block_start: - # Didn't find any "template" keyword before reaching the next block, - # there are probably no template things to check for this block - block_index -= 1 - continue - - # Look for typename in the specified range - for i in range(first_line, last_line + 1, 1): - if re.search(typename_pattern, clean_lines.elided[i]): - return True - block_index -= 1 - return False +def CheckCommaSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing near commas and semicolons. + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + raw = clean_lines.lines_without_raw_strings + line = clean_lines.elided[linenum] -def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): - """Checks for horizontal spacing near commas. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Except after an opening paren, or after another opening brace (in case of - # an initializer list, for instance), you should have spaces before your - # braces when they are delimiting blocks, classes, namespaces etc. - # And since you should never have braces at the beginning of a line, - # this is an easy test. Except that braces used for initialization don't - # follow the same rule; we often don't want spaces before those. - match = re.match(r'^(.*[^ ({>]){', line) - - if match: - # Try a bit harder to check for brace initialization. This - # happens in one of the following forms: - # Constructor() : initializer_list_{} { ... } - # Constructor{}.MemberFunction() - # Type variable{}; - # FunctionCall(type{}, ...); - # LastArgument(..., type{}); - # LOG(INFO) << type{} << " ..."; - # map_of_type[{...}] = ...; - # ternary = expr ? new type{} : nullptr; - # OuterTemplate{}> - # - # We check for the character following the closing brace, and - # silence the warning if it's one of those listed above, i.e. - # "{.;,)<>]:". + # You should always have a space after a comma (either as fn arg or operator) # - # To account for nested initializer list, we allow any number of - # closing braces up to "{;,)<". We can't simply silence the - # warning on first sight of closing brace, because that would - # cause false negatives for things that are not initializer lists. - # Silence this: But not this: - # Outer{ if (...) { - # Inner{...} if (...){ // Missing space before { - # }; } + # This does not apply when the non-space character following the + # comma is another comma, since the only time when that happens is + # for empty macro arguments. # - # There is a false negative with this approach if people inserted - # spurious semicolons, e.g. "if (cond){};", but we will catch the - # spurious semicolon with a separate check. - leading_text = match.group(1) - (endline, endlinenum, endpos) = CloseExpression( - clean_lines, linenum, len(match.group(1))) - trailing_text = '' - if endpos > -1: - trailing_text = endline[endpos:] - for offset in range(endlinenum + 1, - min(endlinenum + 3, clean_lines.NumLines() - 1)): - trailing_text += clean_lines.elided[offset] - # We also suppress warnings for `uint64_t{expression}` etc., as the style - # guide recommends brace initialization for integral types to avoid - # overflow/truncation. - if (not re.match(r'^[\s}]*[{.;,)<>\]:]', trailing_text) - and not _IsType(clean_lines, nesting_state, leading_text)): - error(filename, linenum, 'whitespace/braces', 5, - 'Missing space before {') - - # Make sure '} else {' has spaces. - if re.search(r'}else', line): - error(filename, linenum, 'whitespace/braces', 5, - 'Missing space before else') - - # You shouldn't have a space before a semicolon at the end of the line. - # There's a special case for "for" since the style guide allows space before - # the semicolon there. - if re.search(r':\s*;\s*$', line): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Semicolon defining empty statement. Use {} instead.') - elif re.search(r'^\s*;\s*$', line): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Line contains only semicolon. If this should be an empty statement, ' - 'use {} instead.') - elif (re.search(r'\s+;\s*$', line) and - not re.search(r'\bfor\b', line)): - error(filename, linenum, 'whitespace/semicolon', 5, - 'Extra space before last semicolon. If this should be an empty ' - 'statement, use {} instead.') + # We run this check in two passes: first pass on elided lines to + # verify that lines contain missing whitespaces, second pass on raw + # lines to confirm that those missing whitespaces are not due to + # elided comments. + match = re.search( + r",[^,\s]", + re.sub(r"\b__VA_OPT__\s*\(,\)", "", re.sub(r"\boperator\s*,\s*\(", "F(", line)), + ) + if match and re.search(r",[^,\s]", raw[linenum]): + error(filename, linenum, "whitespace/comma", 3, "Missing space after ,") + # You should always have a space after a semicolon + # except for few corner cases + # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more + # space after ; + if re.search(r";[^\s};\\)/]", line): + error(filename, linenum, "whitespace/semicolon", 3, "Missing space after ;") -def IsDecltype(clean_lines, linenum, column): - """Check if the token ending on (linenum, column) is decltype(). - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: the number of the line to check. - column: end column of the token to check. - Returns: - True if this token is decltype() expression, False otherwise. - """ - (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) - if start_col < 0: - return False - if re.search(r'\bdecltype\s*$', text[0:start_col]): - return True - return False -def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): - """Checks for additional blank line issues related to sections. - - Currently the only thing checked here is blank line before protected/private. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - class_info: A _ClassInfo objects. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Skip checks if the class is small, where small means 25 lines or less. - # 25 lines seems like a good cutoff since that's the usual height of - # terminals, and any class that can't fit in one screen can't really - # be considered "small". - # - # Also skip checks if we are on the first line. This accounts for - # classes that look like - # class Foo { public: ... }; - # - # If we didn't find the end of the class, last_line would be zero, - # and the check will be skipped by the first condition. - if (class_info.last_line - class_info.starting_linenum <= 24 or - linenum <= class_info.starting_linenum): - return +def _IsType(clean_lines, nesting_state, expr): + """Check if expression looks like a type name, returns true if so. + + Args: + clean_lines: A CleansedLines instance containing the file. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + expr: The expression to check. + Returns: + True, if token looks like a type. + """ + # Keep only the last token in the expression + last_word = re.match(r"^.*(\b\S+)$", expr) + if last_word: + token = last_word.group(1) + else: + token = expr + + # Match native types and stdint types + if _TYPES.match(token): + return True + + # Try a bit harder to match templated types. Walk up the nesting + # stack until we find something that resembles a typename + # declaration for what we are looking for. + typename_pattern = r"\b(?:typename|class|struct)\s+" + re.escape(token) + r"\b" + block_index = len(nesting_state.stack) - 1 + while block_index >= 0: + if isinstance(nesting_state.stack[block_index], _NamespaceInfo): + return False + + # Found where the opening brace is. We want to scan from this + # line up to the beginning of the function, minus a few lines. + # template + # class C + # : public ... { // start scanning here + last_line = nesting_state.stack[block_index].starting_linenum + + next_block_start = 0 + if block_index > 0: + next_block_start = nesting_state.stack[block_index - 1].starting_linenum + first_line = last_line + while first_line >= next_block_start: + if clean_lines.elided[first_line].find("template") >= 0: + break + first_line -= 1 + if first_line < next_block_start: + # Didn't find any "template" keyword before reaching the next block, + # there are probably no template things to check for this block + block_index -= 1 + continue + + # Look for typename in the specified range + for i in range(first_line, last_line + 1, 1): + if re.search(typename_pattern, clean_lines.elided[i]): + return True + block_index -= 1 + + return False + + +def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for horizontal spacing near commas. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] - matched = re.match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) - if matched: - # Issue warning if the line before public/protected/private was - # not a blank line, but don't do this if the previous line contains - # "class" or "struct". This can happen two ways: - # - We are at the beginning of the class. - # - We are forward-declaring an inner class that is semantically - # private, but needed to be public for implementation reasons. - # Also ignores cases where the previous line ends with a backslash as can be - # common when defining classes in C macros. - prev_line = clean_lines.lines[linenum - 1] - if (not IsBlankLine(prev_line) and - not re.search(r'\b(class|struct)\b', prev_line) and - not re.search(r'\\$', prev_line)): - # Try a bit harder to find the beginning of the class. This is to - # account for multi-line base-specifier lists, e.g.: - # class Derived - # : public Base { - end_class_head = class_info.starting_linenum - for i in range(class_info.starting_linenum, linenum): - if re.search(r'\{\s*$', clean_lines.lines[i]): - end_class_head = i - break - if end_class_head < linenum - 1: - error(filename, linenum, 'whitespace/blank_line', 3, - f'"{matched.group(1)}:" should be preceded by a blank line') + # Except after an opening paren, or after another opening brace (in case of + # an initializer list, for instance), you should have spaces before your + # braces when they are delimiting blocks, classes, namespaces etc. + # And since you should never have braces at the beginning of a line, + # this is an easy test. Except that braces used for initialization don't + # follow the same rule; we often don't want spaces before those. + match = re.match(r"^(.*[^ ({>]){", line) + + if match: + # Try a bit harder to check for brace initialization. This + # happens in one of the following forms: + # Constructor() : initializer_list_{} { ... } + # Constructor{}.MemberFunction() + # Type variable{}; + # FunctionCall(type{}, ...); + # LastArgument(..., type{}); + # LOG(INFO) << type{} << " ..."; + # map_of_type[{...}] = ...; + # ternary = expr ? new type{} : nullptr; + # OuterTemplate{}> + # + # We check for the character following the closing brace, and + # silence the warning if it's one of those listed above, i.e. + # "{.;,)<>]:". + # + # To account for nested initializer list, we allow any number of + # closing braces up to "{;,)<". We can't simply silence the + # warning on first sight of closing brace, because that would + # cause false negatives for things that are not initializer lists. + # Silence this: But not this: + # Outer{ if (...) { + # Inner{...} if (...){ // Missing space before { + # }; } + # + # There is a false negative with this approach if people inserted + # spurious semicolons, e.g. "if (cond){};", but we will catch the + # spurious semicolon with a separate check. + leading_text = match.group(1) + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1)) + ) + trailing_text = "" + if endpos > -1: + trailing_text = endline[endpos:] + for offset in range( + endlinenum + 1, min(endlinenum + 3, clean_lines.NumLines() - 1) + ): + trailing_text += clean_lines.elided[offset] + # We also suppress warnings for `uint64_t{expression}` etc., as the style + # guide recommends brace initialization for integral types to avoid + # overflow/truncation. + if not re.match(r"^[\s}]*[{.;,)<>\]:]", trailing_text) and not _IsType( + clean_lines, nesting_state, leading_text + ): + error(filename, linenum, "whitespace/braces", 5, "Missing space before {") + + # Make sure '} else {' has spaces. + if re.search(r"}else", line): + error(filename, linenum, "whitespace/braces", 5, "Missing space before else") + + # You shouldn't have a space before a semicolon at the end of the line. + # There's a special case for "for" since the style guide allows space before + # the semicolon there. + if re.search(r":\s*;\s*$", line): + error( + filename, + linenum, + "whitespace/semicolon", + 5, + "Semicolon defining empty statement. Use {} instead.", + ) + elif re.search(r"^\s*;\s*$", line): + error( + filename, + linenum, + "whitespace/semicolon", + 5, + "Line contains only semicolon. If this should be an empty statement, " + "use {} instead.", + ) + elif re.search(r"\s+;\s*$", line) and not re.search(r"\bfor\b", line): + error( + filename, + linenum, + "whitespace/semicolon", + 5, + "Extra space before last semicolon. If this should be an empty " + "statement, use {} instead.", + ) + + +def IsDecltype(clean_lines, linenum, column): + """Check if the token ending on (linenum, column) is decltype(). + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: the number of the line to check. + column: end column of the token to check. + Returns: + True if this token is decltype() expression, False otherwise. + """ + (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) + if start_col < 0: + return False + if re.search(r"\bdecltype\s*$", text[0:start_col]): + return True + return False + + +def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): + """Checks for additional blank line issues related to sections. + + Currently the only thing checked here is blank line before protected/private. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + class_info: A _ClassInfo objects. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Skip checks if the class is small, where small means 25 lines or less. + # 25 lines seems like a good cutoff since that's the usual height of + # terminals, and any class that can't fit in one screen can't really + # be considered "small". + # + # Also skip checks if we are on the first line. This accounts for + # classes that look like + # class Foo { public: ... }; + # + # If we didn't find the end of the class, last_line would be zero, + # and the check will be skipped by the first condition. + if ( + class_info.last_line - class_info.starting_linenum <= 24 + or linenum <= class_info.starting_linenum + ): + return + + matched = re.match(r"\s*(public|protected|private):", clean_lines.lines[linenum]) + if matched: + # Issue warning if the line before public/protected/private was + # not a blank line, but don't do this if the previous line contains + # "class" or "struct". This can happen two ways: + # - We are at the beginning of the class. + # - We are forward-declaring an inner class that is semantically + # private, but needed to be public for implementation reasons. + # Also ignores cases where the previous line ends with a backslash as can be + # common when defining classes in C macros. + prev_line = clean_lines.lines[linenum - 1] + if ( + not IsBlankLine(prev_line) + and not re.search(r"\b(class|struct)\b", prev_line) + and not re.search(r"\\$", prev_line) + ): + # Try a bit harder to find the beginning of the class. This is to + # account for multi-line base-specifier lists, e.g.: + # class Derived + # : public Base { + end_class_head = class_info.starting_linenum + for i in range(class_info.starting_linenum, linenum): + if re.search(r"\{\s*$", clean_lines.lines[i]): + end_class_head = i + break + if end_class_head < linenum - 1: + error( + filename, + linenum, + "whitespace/blank_line", + 3, + f'"{matched.group(1)}:" should be preceded by a blank line', + ) def GetPreviousNonBlankLine(clean_lines, linenum): - """Return the most recent non-blank line and its line number. + """Return the most recent non-blank line and its line number. - Args: - clean_lines: A CleansedLines instance containing the file contents. - linenum: The number of the line to check. + Args: + clean_lines: A CleansedLines instance containing the file contents. + linenum: The number of the line to check. - Returns: - A tuple with two elements. The first element is the contents of the last - non-blank line before the current line, or the empty string if this is the - first non-blank line. The second is the line number of that line, or -1 - if this is the first non-blank line. - """ + Returns: + A tuple with two elements. The first element is the contents of the last + non-blank line before the current line, or the empty string if this is the + first non-blank line. The second is the line number of that line, or -1 + if this is the first non-blank line. + """ - prevlinenum = linenum - 1 - while prevlinenum >= 0: - prevline = clean_lines.elided[prevlinenum] - if not IsBlankLine(prevline): # if not a blank line... - return (prevline, prevlinenum) - prevlinenum -= 1 - return ('', -1) + prevlinenum = linenum - 1 + while prevlinenum >= 0: + prevline = clean_lines.elided[prevlinenum] + if not IsBlankLine(prevline): # if not a blank line... + return (prevline, prevlinenum) + prevlinenum -= 1 + return ("", -1) def CheckBraces(filename, clean_lines, linenum, error): - """Looks for misplaced braces (e.g. at the end of line). - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - line = clean_lines.elided[linenum] # get rid of comments and strings - - if re.match(r'\s*{\s*$', line): - # We allow an open brace to start a line in the case where someone is using - # braces in a block to explicitly create a new scope, which is commonly used - # to control the lifetime of stack-allocated variables. Braces are also - # used for brace initializers inside function calls. We don't detect this - # perfectly: we just don't complain if the last non-whitespace character on - # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the - # previous line starts a preprocessor block. We also allow a brace on the - # following line if it is part of an array initialization and would not fit - # within the 80 character limit of the preceding line. - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if (not re.search(r'[,;:}{(]\s*$', prevline) and - not re.match(r'\s*#', prevline) and - not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)): - error(filename, linenum, 'whitespace/braces', 4, - '{ should almost always be at the end of the previous line') - - # An else clause should be on the same line as the preceding closing brace. - if last_wrong := re.match(r'\s*else\b\s*(?:if\b|\{|$)', line): - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if re.match(r'\s*}\s*$', prevline): - error(filename, linenum, 'whitespace/newline', 4, - 'An else should appear on the same line as the preceding }') - else: - last_wrong = False - - # If braces come on one side of an else, they should be on both. - # However, we have to worry about "else if" that spans multiple lines! - if re.search(r'else if\s*\(', line): # could be multi-line if - brace_on_left = bool(re.search(r'}\s*else if\s*\(', line)) - # find the ( after the if - pos = line.find('else if') - pos = line.find('(', pos) - if pos > 0: - (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) - brace_on_right = endline[endpos:].find('{') != -1 - if brace_on_left != brace_on_right: # must be brace after if - error(filename, linenum, 'readability/braces', 5, - 'If an else has a brace on one side, it should have it on both') - # Prevent detection if statement has { and we detected an improper newline after } - elif re.search(r'}\s*else[^{]*$', line) or (re.match(r'[^}]*else\s*{', line) and not last_wrong): - error(filename, linenum, 'readability/braces', 5, - 'If an else has a brace on one side, it should have it on both') - - # No control clauses with braces should have its contents on the same line - # Exclude } which will be covered by empty-block detect - # Exclude ; which may be used by while in a do-while - if keyword := re.search( - r'\b(else if|if|while|for|switch)' # These have parens - r'\s*\(.*\)\s*(?:\[\[(?:un)?likely\]\]\s*)?{\s*[^\s\\};]', line): - error(filename, linenum, 'whitespace/newline', 5, - f'Controlled statements inside brackets of {keyword.group(1)} clause' - ' should be on a separate line') - elif keyword := re.search( - r'\b(else|do|try)' # These don't have parens - r'\s*(?:\[\[(?:un)?likely\]\]\s*)?{\s*[^\s\\}]', line): - error(filename, linenum, 'whitespace/newline', 5, - f'Controlled statements inside brackets of {keyword.group(1)} clause' - ' should be on a separate line') - - # TODO: Err on if...else and do...while statements without braces; - # style guide has changed since the below comment was written - - # Check single-line if/else bodies. The style guide says 'curly braces are not - # required for single-line statements'. We additionally allow multi-line, - # single statements, but we reject anything with more than one semicolon in - # it. This means that the first semicolon after the if should be at the end of - # its line, and the line after that should have an indent level equal to or - # lower than the if. We also check for ambiguous if/else nesting without - # braces. - if_else_match = re.search(r'\b(if\s*(|constexpr)\s*\(|else\b)', line) - if if_else_match and not re.match(r'\s*#', line): - if_indent = GetIndentLevel(line) - endline, endlinenum, endpos = line, linenum, if_else_match.end() - if_match = re.search(r'\bif\s*(|constexpr)\s*\(', line) - if if_match: - # This could be a multiline if condition, so find the end first. - pos = if_match.end() - 1 - (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) - # Check for an opening brace, either directly after the if or on the next - # line. If found, this isn't a single-statement conditional. - if (not re.match(r'\s*(?:\[\[(?:un)?likely\]\]\s*)?{', endline[endpos:]) - and not (re.match(r'\s*$', endline[endpos:]) - and endlinenum < (len(clean_lines.elided) - 1) - and re.match(r'\s*{', clean_lines.elided[endlinenum + 1]))): - while (endlinenum < len(clean_lines.elided) - and ';' not in clean_lines.elided[endlinenum][endpos:]): - endlinenum += 1 - endpos = 0 - if endlinenum < len(clean_lines.elided): - endline = clean_lines.elided[endlinenum] - # We allow a mix of whitespace and closing braces (e.g. for one-liner - # methods) and a single \ after the semicolon (for macros) - endpos = endline.find(';') - if not re.match(r';[\s}]*(\\?)$', endline[endpos:]): - # Semicolon isn't the last character, there's something trailing. - # Output a warning if the semicolon is not contained inside - # a lambda expression. - if not re.match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', - endline): - error(filename, linenum, 'readability/braces', 4, - 'If/else bodies with multiple statements require braces') - elif endlinenum < len(clean_lines.elided) - 1: - # Make sure the next line is dedented - next_line = clean_lines.elided[endlinenum + 1] - next_indent = GetIndentLevel(next_line) - # With ambiguous nested if statements, this will error out on the - # if that *doesn't* match the else, regardless of whether it's the - # inner one or outer one. - if (if_match and re.match(r'\s*else\b', next_line) - and next_indent != if_indent): - error(filename, linenum, 'readability/braces', 4, - 'Else clause should be indented at the same level as if. ' - 'Ambiguous nested if/else chains require braces.') - elif next_indent > if_indent: - error(filename, linenum, 'readability/braces', 4, - 'If/else bodies with multiple statements require braces') + """Looks for misplaced braces (e.g. at the end of line). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] # get rid of comments and strings + + if re.match(r"\s*{\s*$", line): + # We allow an open brace to start a line in the case where someone is using + # braces in a block to explicitly create a new scope, which is commonly used + # to control the lifetime of stack-allocated variables. Braces are also + # used for brace initializers inside function calls. We don't detect this + # perfectly: we just don't complain if the last non-whitespace character on + # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the + # previous line starts a preprocessor block. We also allow a brace on the + # following line if it is part of an array initialization and would not fit + # within the 80 character limit of the preceding line. + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if ( + not re.search(r"[,;:}{(]\s*$", prevline) + and not re.match(r"\s*#", prevline) + and not (GetLineWidth(prevline) > _line_length - 2 and "[]" in prevline) + ): + error( + filename, + linenum, + "whitespace/braces", + 4, + "{ should almost always be at the end of the previous line", + ) + + # An else clause should be on the same line as the preceding closing brace. + if last_wrong := re.match(r"\s*else\b\s*(?:if\b|\{|$)", line): + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if re.match(r"\s*}\s*$", prevline): + error( + filename, + linenum, + "whitespace/newline", + 4, + "An else should appear on the same line as the preceding }", + ) + else: + last_wrong = False + + # If braces come on one side of an else, they should be on both. + # However, we have to worry about "else if" that spans multiple lines! + if re.search(r"else if\s*\(", line): # could be multi-line if + brace_on_left = bool(re.search(r"}\s*else if\s*\(", line)) + # find the ( after the if + pos = line.find("else if") + pos = line.find("(", pos) + if pos > 0: + (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) + brace_on_right = endline[endpos:].find("{") != -1 + if brace_on_left != brace_on_right: # must be brace after if + error( + filename, + linenum, + "readability/braces", + 5, + "If an else has a brace on one side, it should have it on both", + ) + # Prevent detection if statement has { and we detected an improper newline after } + elif re.search(r"}\s*else[^{]*$", line) or ( + re.match(r"[^}]*else\s*{", line) and not last_wrong + ): + error( + filename, + linenum, + "readability/braces", + 5, + "If an else has a brace on one side, it should have it on both", + ) + + # No control clauses with braces should have its contents on the same line + # Exclude } which will be covered by empty-block detect + # Exclude ; which may be used by while in a do-while + if keyword := re.search( + r"\b(else if|if|while|for|switch)" # These have parens + r"\s*\(.*\)\s*(?:\[\[(?:un)?likely\]\]\s*)?{\s*[^\s\\};]", + line, + ): + error( + filename, + linenum, + "whitespace/newline", + 5, + f"Controlled statements inside brackets of {keyword.group(1)} clause" + " should be on a separate line", + ) + elif keyword := re.search( + r"\b(else|do|try)" # These don't have parens + r"\s*(?:\[\[(?:un)?likely\]\]\s*)?{\s*[^\s\\}]", + line, + ): + error( + filename, + linenum, + "whitespace/newline", + 5, + f"Controlled statements inside brackets of {keyword.group(1)} clause" + " should be on a separate line", + ) + + # TODO: Err on if...else and do...while statements without braces; + # style guide has changed since the below comment was written + + # Check single-line if/else bodies. The style guide says 'curly braces are not + # required for single-line statements'. We additionally allow multi-line, + # single statements, but we reject anything with more than one semicolon in + # it. This means that the first semicolon after the if should be at the end of + # its line, and the line after that should have an indent level equal to or + # lower than the if. We also check for ambiguous if/else nesting without + # braces. + if_else_match = re.search(r"\b(if\s*(|constexpr)\s*\(|else\b)", line) + if if_else_match and not re.match(r"\s*#", line): + if_indent = GetIndentLevel(line) + endline, endlinenum, endpos = line, linenum, if_else_match.end() + if_match = re.search(r"\bif\s*(|constexpr)\s*\(", line) + if if_match: + # This could be a multiline if condition, so find the end first. + pos = if_match.end() - 1 + (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) + # Check for an opening brace, either directly after the if or on the next + # line. If found, this isn't a single-statement conditional. + if not re.match( + r"\s*(?:\[\[(?:un)?likely\]\]\s*)?{", endline[endpos:] + ) and not ( + re.match(r"\s*$", endline[endpos:]) + and endlinenum < (len(clean_lines.elided) - 1) + and re.match(r"\s*{", clean_lines.elided[endlinenum + 1]) + ): + while ( + endlinenum < len(clean_lines.elided) + and ";" not in clean_lines.elided[endlinenum][endpos:] + ): + endlinenum += 1 + endpos = 0 + if endlinenum < len(clean_lines.elided): + endline = clean_lines.elided[endlinenum] + # We allow a mix of whitespace and closing braces (e.g. for one-liner + # methods) and a single \ after the semicolon (for macros) + endpos = endline.find(";") + if not re.match(r";[\s}]*(\\?)$", endline[endpos:]): + # Semicolon isn't the last character, there's something trailing. + # Output a warning if the semicolon is not contained inside + # a lambda expression. + if not re.match( + r"^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$", endline + ): + error( + filename, + linenum, + "readability/braces", + 4, + "If/else bodies with multiple statements require braces", + ) + elif endlinenum < len(clean_lines.elided) - 1: + # Make sure the next line is dedented + next_line = clean_lines.elided[endlinenum + 1] + next_indent = GetIndentLevel(next_line) + # With ambiguous nested if statements, this will error out on the + # if that *doesn't* match the else, regardless of whether it's the + # inner one or outer one. + if ( + if_match + and re.match(r"\s*else\b", next_line) + and next_indent != if_indent + ): + error( + filename, + linenum, + "readability/braces", + 4, + "Else clause should be indented at the same level as if. " + "Ambiguous nested if/else chains require braces.", + ) + elif next_indent > if_indent: + error( + filename, + linenum, + "readability/braces", + 4, + "If/else bodies with multiple statements require braces", + ) def CheckTrailingSemicolon(filename, clean_lines, linenum, error): - """Looks for redundant trailing semicolon. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - line = clean_lines.elided[linenum] - - # Block bodies should not be followed by a semicolon. Due to C++11 - # brace initialization, there are more places where semicolons are - # required than not, so we explicitly list the allowed rules rather - # than listing the disallowed ones. These are the places where "};" - # should be replaced by just "}": - # 1. Some flavor of block following closing parenthesis: - # for (;;) {}; - # while (...) {}; - # switch (...) {}; - # Function(...) {}; - # if (...) {}; - # if (...) else if (...) {}; - # - # 2. else block: - # if (...) else {}; - # - # 3. const member function: - # Function(...) const {}; - # - # 4. Block following some statement: - # x = 42; - # {}; - # - # 5. Block at the beginning of a function: - # Function(...) { - # {}; - # } - # - # Note that naively checking for the preceding "{" will also match - # braces inside multi-dimensional arrays, but this is fine since - # that expression will not contain semicolons. - # - # 6. Block following another block: - # while (true) {} - # {}; - # - # 7. End of namespaces: - # namespace {}; - # - # These semicolons seems far more common than other kinds of - # redundant semicolons, possibly due to people converting classes - # to namespaces. For now we do not warn for this case. - # - # Try matching case 1 first. - match = re.match(r'^(.*\)\s*)\{', line) - if match: - # Matched closing parenthesis (case 1). Check the token before the - # matching opening parenthesis, and don't warn if it looks like a - # macro. This avoids these false positives: - # - macro that defines a base class - # - multi-line macro that defines a base class - # - macro that defines the whole class-head + """Looks for redundant trailing semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] + + # Block bodies should not be followed by a semicolon. Due to C++11 + # brace initialization, there are more places where semicolons are + # required than not, so we explicitly list the allowed rules rather + # than listing the disallowed ones. These are the places where "};" + # should be replaced by just "}": + # 1. Some flavor of block following closing parenthesis: + # for (;;) {}; + # while (...) {}; + # switch (...) {}; + # Function(...) {}; + # if (...) {}; + # if (...) else if (...) {}; # - # But we still issue warnings for macros that we know are safe to - # warn, specifically: - # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P - # - TYPED_TEST - # - INTERFACE_DEF - # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: + # 2. else block: + # if (...) else {}; # - # We implement a list of safe macros instead of a list of - # unsafe macros, even though the latter appears less frequently in - # google code and would have been easier to implement. This is because - # the downside for getting the allowed checks wrong means some extra - # semicolons, while the downside for getting disallowed checks wrong - # would result in compile errors. + # 3. const member function: + # Function(...) const {}; # - # In addition to macros, we also don't want to warn on - # - Compound literals - # - Lambdas - # - alignas specifier with anonymous structs - # - decltype - closing_brace_pos = match.group(1).rfind(')') - opening_parenthesis = ReverseCloseExpression( - clean_lines, linenum, closing_brace_pos) - if opening_parenthesis[2] > -1: - line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] - macro = re.search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix) - func = re.match(r'^(.*\])\s*$', line_prefix) - if ((macro and - macro.group(1) not in ( - 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', - 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', - 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or - (func and not re.search(r'\boperator\s*\[\s*\]', func.group(1))) or - re.search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or - re.search(r'\bdecltype$', line_prefix) or - re.search(r'\s+=\s*$', line_prefix)): - match = None - if (match and - opening_parenthesis[1] > 1 and - re.search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): - # Multi-line lambda-expression - match = None - - else: - # Try matching cases 2-3. - match = re.match(r'^(.*(?:else|\)\s*const)\s*)\{', line) - if not match: - # Try matching cases 4-6. These are always matched on separate lines. - # - # Note that we can't simply concatenate the previous line to the - # current line and do a single match, otherwise we may output - # duplicate warnings for the blank line case: - # if (cond) { - # // blank line - # } - prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if prevline and re.search(r'[;{}]\s*$', prevline): - match = re.match(r'^(\s*)\{', line) - - # Check matching closing brace - if match: - (endline, endlinenum, endpos) = CloseExpression( - clean_lines, linenum, len(match.group(1))) - if endpos > -1 and re.match(r'^\s*;', endline[endpos:]): - # Current {} pair is eligible for semicolon check, and we have found - # the redundant semicolon, output warning here. - # - # Note: because we are scanning forward for opening braces, and - # outputting warnings for the matching closing brace, if there are - # nested blocks with trailing semicolons, we will get the error - # messages in reversed order. - - # We need to check the line forward for NOLINT - raw_lines = clean_lines.raw_lines - ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1, - error) - ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum, - error) - - error(filename, endlinenum, 'readability/braces', 4, - "You don't need a ; after a }") + # 4. Block following some statement: + # x = 42; + # {}; + # + # 5. Block at the beginning of a function: + # Function(...) { + # {}; + # } + # + # Note that naively checking for the preceding "{" will also match + # braces inside multi-dimensional arrays, but this is fine since + # that expression will not contain semicolons. + # + # 6. Block following another block: + # while (true) {} + # {}; + # + # 7. End of namespaces: + # namespace {}; + # + # These semicolons seems far more common than other kinds of + # redundant semicolons, possibly due to people converting classes + # to namespaces. For now we do not warn for this case. + # + # Try matching case 1 first. + match = re.match(r"^(.*\)\s*)\{", line) + if match: + # Matched closing parenthesis (case 1). Check the token before the + # matching opening parenthesis, and don't warn if it looks like a + # macro. This avoids these false positives: + # - macro that defines a base class + # - multi-line macro that defines a base class + # - macro that defines the whole class-head + # + # But we still issue warnings for macros that we know are safe to + # warn, specifically: + # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P + # - TYPED_TEST + # - INTERFACE_DEF + # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: + # + # We implement a list of safe macros instead of a list of + # unsafe macros, even though the latter appears less frequently in + # google code and would have been easier to implement. This is because + # the downside for getting the allowed checks wrong means some extra + # semicolons, while the downside for getting disallowed checks wrong + # would result in compile errors. + # + # In addition to macros, we also don't want to warn on + # - Compound literals + # - Lambdas + # - alignas specifier with anonymous structs + # - decltype + closing_brace_pos = match.group(1).rfind(")") + opening_parenthesis = ReverseCloseExpression( + clean_lines, linenum, closing_brace_pos + ) + if opening_parenthesis[2] > -1: + line_prefix = opening_parenthesis[0][0 : opening_parenthesis[2]] + macro = re.search(r"\b([A-Z_][A-Z0-9_]*)\s*$", line_prefix) + func = re.match(r"^(.*\])\s*$", line_prefix) + if ( + ( + macro + and macro.group(1) + not in ( + "TEST", + "TEST_F", + "MATCHER", + "MATCHER_P", + "TYPED_TEST", + "EXCLUSIVE_LOCKS_REQUIRED", + "SHARED_LOCKS_REQUIRED", + "LOCKS_EXCLUDED", + "INTERFACE_DEF", + ) + ) + or (func and not re.search(r"\boperator\s*\[\s*\]", func.group(1))) + or re.search(r"\b(?:struct|union)\s+alignas\s*$", line_prefix) + or re.search(r"\bdecltype$", line_prefix) + or re.search(r"\s+=\s*$", line_prefix) + ): + match = None + if ( + match + and opening_parenthesis[1] > 1 + and re.search(r"\]\s*$", clean_lines.elided[opening_parenthesis[1] - 1]) + ): + # Multi-line lambda-expression + match = None + + else: + # Try matching cases 2-3. + match = re.match(r"^(.*(?:else|\)\s*const)\s*)\{", line) + if not match: + # Try matching cases 4-6. These are always matched on separate lines. + # + # Note that we can't simply concatenate the previous line to the + # current line and do a single match, otherwise we may output + # duplicate warnings for the blank line case: + # if (cond) { + # // blank line + # } + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if prevline and re.search(r"[;{}]\s*$", prevline): + match = re.match(r"^(\s*)\{", line) + + # Check matching closing brace + if match: + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1)) + ) + if endpos > -1 and re.match(r"^\s*;", endline[endpos:]): + # Current {} pair is eligible for semicolon check, and we have found + # the redundant semicolon, output warning here. + # + # Note: because we are scanning forward for opening braces, and + # outputting warnings for the matching closing brace, if there are + # nested blocks with trailing semicolons, we will get the error + # messages in reversed order. + + # We need to check the line forward for NOLINT + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions( + filename, raw_lines[endlinenum - 1], endlinenum - 1, error + ) + ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum, error) + + error( + filename, + endlinenum, + "readability/braces", + 4, + "You don't need a ; after a }", + ) def CheckEmptyBlockBody(filename, clean_lines, linenum, error): - """Look for empty loop/conditional body with only a single semicolon. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Search for loop keywords at the beginning of the line. Because only - # whitespaces are allowed before the keywords, this will also ignore most - # do-while-loops, since those lines should start with closing brace. - # - # We also check "if" blocks here, since an empty conditional block - # is likely an error. - line = clean_lines.elided[linenum] - matched = re.match(r'\s*(for|while|if)\s*\(', line) - if matched: - # Find the end of the conditional expression. - (end_line, end_linenum, end_pos) = CloseExpression( - clean_lines, linenum, line.find('(')) - - # Output warning if what follows the condition expression is a semicolon. - # No warning for all other cases, including whitespace or newline, since we - # have a separate check for semicolons preceded by whitespace. - if end_pos >= 0 and re.match(r';', end_line[end_pos:]): - if matched.group(1) == 'if': - error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, - 'Empty conditional bodies should use {}') - else: - error(filename, end_linenum, 'whitespace/empty_loop_body', 5, - 'Empty loop bodies should use {} or continue') - - # Check for if statements that have completely empty bodies (no comments) - # and no else clauses. - if end_pos >= 0 and matched.group(1) == 'if': - # Find the position of the opening { for the if statement. - # Return without logging an error if it has no brackets. - opening_linenum = end_linenum - opening_line_fragment = end_line[end_pos:] - # Loop until EOF or find anything that's not whitespace or opening {. - while not re.search(r'^\s*\{', opening_line_fragment): - if re.search(r'^(?!\s*$)', opening_line_fragment): - # Conditional has no brackets. - return - opening_linenum += 1 - if opening_linenum == len(clean_lines.elided): - # Couldn't find conditional's opening { or any code before EOF. - return - opening_line_fragment = clean_lines.elided[opening_linenum] - # Set opening_line (opening_line_fragment may not be entire opening line). - opening_line = clean_lines.elided[opening_linenum] - - # Find the position of the closing }. - opening_pos = opening_line_fragment.find('{') - if opening_linenum == end_linenum: - # We need to make opening_pos relative to the start of the entire line. - opening_pos += end_pos - (closing_line, closing_linenum, closing_pos) = CloseExpression( - clean_lines, opening_linenum, opening_pos) - if closing_pos < 0: - return + """Look for empty loop/conditional body with only a single semicolon. - # Now construct the body of the conditional. This consists of the portion - # of the opening line after the {, all lines until the closing line, - # and the portion of the closing line before the }. - if (clean_lines.raw_lines[opening_linenum] != - CleanseComments(clean_lines.raw_lines[opening_linenum])): - # Opening line ends with a comment, so conditional isn't empty. - return - if closing_linenum > opening_linenum: - # Opening line after the {. Ignore comments here since we checked above. - bodylist = list(opening_line[opening_pos+1:]) - # All lines until closing line, excluding closing line, with comments. - bodylist.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum]) - # Closing line before the }. Won't (and can't) have comments. - bodylist.append(clean_lines.elided[closing_linenum][:closing_pos-1]) - body = '\n'.join(bodylist) - else: - # If statement has brackets and fits on a single line. - body = opening_line[opening_pos+1:closing_pos-1] - - # Check if the body is empty - if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): - return - # The body is empty. Now make sure there's not an else clause. - current_linenum = closing_linenum - current_line_fragment = closing_line[closing_pos:] - # Loop until EOF or find anything that's not whitespace or else clause. - while re.search(r'^\s*$|^(?=\s*else)', current_line_fragment): - if re.search(r'^(?=\s*else)', current_line_fragment): - # Found an else clause, so don't log an error. - return - current_linenum += 1 - if current_linenum == len(clean_lines.elided): - break - current_line_fragment = clean_lines.elided[current_linenum] - - # The body is empty and there's no else clause until EOF or other code. - error(filename, end_linenum, 'whitespace/empty_if_body', 4, - ('If statement had no body and no else clause')) + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Search for loop keywords at the beginning of the line. Because only + # whitespaces are allowed before the keywords, this will also ignore most + # do-while-loops, since those lines should start with closing brace. + # + # We also check "if" blocks here, since an empty conditional block + # is likely an error. + line = clean_lines.elided[linenum] + matched = re.match(r"\s*(for|while|if)\s*\(", line) + if matched: + # Find the end of the conditional expression. + (end_line, end_linenum, end_pos) = CloseExpression( + clean_lines, linenum, line.find("(") + ) + + # Output warning if what follows the condition expression is a semicolon. + # No warning for all other cases, including whitespace or newline, since we + # have a separate check for semicolons preceded by whitespace. + if end_pos >= 0 and re.match(r";", end_line[end_pos:]): + if matched.group(1) == "if": + error( + filename, + end_linenum, + "whitespace/empty_conditional_body", + 5, + "Empty conditional bodies should use {}", + ) + else: + error( + filename, + end_linenum, + "whitespace/empty_loop_body", + 5, + "Empty loop bodies should use {} or continue", + ) + + # Check for if statements that have completely empty bodies (no comments) + # and no else clauses. + if end_pos >= 0 and matched.group(1) == "if": + # Find the position of the opening { for the if statement. + # Return without logging an error if it has no brackets. + opening_linenum = end_linenum + opening_line_fragment = end_line[end_pos:] + # Loop until EOF or find anything that's not whitespace or opening {. + while not re.search(r"^\s*\{", opening_line_fragment): + if re.search(r"^(?!\s*$)", opening_line_fragment): + # Conditional has no brackets. + return + opening_linenum += 1 + if opening_linenum == len(clean_lines.elided): + # Couldn't find conditional's opening { or any code before EOF. + return + opening_line_fragment = clean_lines.elided[opening_linenum] + # Set opening_line (opening_line_fragment may not be entire opening line). + opening_line = clean_lines.elided[opening_linenum] + + # Find the position of the closing }. + opening_pos = opening_line_fragment.find("{") + if opening_linenum == end_linenum: + # We need to make opening_pos relative to the start of the entire line. + opening_pos += end_pos + (closing_line, closing_linenum, closing_pos) = CloseExpression( + clean_lines, opening_linenum, opening_pos + ) + if closing_pos < 0: + return + + # Now construct the body of the conditional. This consists of the portion + # of the opening line after the {, all lines until the closing line, + # and the portion of the closing line before the }. + if clean_lines.raw_lines[opening_linenum] != CleanseComments( + clean_lines.raw_lines[opening_linenum] + ): + # Opening line ends with a comment, so conditional isn't empty. + return + if closing_linenum > opening_linenum: + # Opening line after the {. Ignore comments here since we checked above. + bodylist = list(opening_line[opening_pos + 1 :]) + # All lines until closing line, excluding closing line, with comments. + bodylist.extend( + clean_lines.raw_lines[opening_linenum + 1 : closing_linenum] + ) + # Closing line before the }. Won't (and can't) have comments. + bodylist.append(clean_lines.elided[closing_linenum][: closing_pos - 1]) + body = "\n".join(bodylist) + else: + # If statement has brackets and fits on a single line. + body = opening_line[opening_pos + 1 : closing_pos - 1] + + # Check if the body is empty + if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): + return + # The body is empty. Now make sure there's not an else clause. + current_linenum = closing_linenum + current_line_fragment = closing_line[closing_pos:] + # Loop until EOF or find anything that's not whitespace or else clause. + while re.search(r"^\s*$|^(?=\s*else)", current_line_fragment): + if re.search(r"^(?=\s*else)", current_line_fragment): + # Found an else clause, so don't log an error. + return + current_linenum += 1 + if current_linenum == len(clean_lines.elided): + break + current_line_fragment = clean_lines.elided[current_linenum] + + # The body is empty and there's no else clause until EOF or other code. + error( + filename, + end_linenum, + "whitespace/empty_if_body", + 4, + ("If statement had no body and no else clause"), + ) def FindCheckMacro(line): - """Find a replaceable CHECK-like macro. - - Args: - line: line to search on. - Returns: - (macro name, start position), or (None, -1) if no replaceable - macro is found. - """ - for macro in _CHECK_MACROS: - i = line.find(macro) - if i >= 0: - # Find opening parenthesis. Do a regular expression match here - # to make sure that we are matching the expected CHECK macro, as - # opposed to some other macro that happens to contain the CHECK - # substring. - matched = re.match(r'^(.*\b' + macro + r'\s*)\(', line) - if not matched: - continue - return (macro, len(matched.group(1))) - return (None, -1) + """Find a replaceable CHECK-like macro. + + Args: + line: line to search on. + Returns: + (macro name, start position), or (None, -1) if no replaceable + macro is found. + """ + for macro in _CHECK_MACROS: + i = line.find(macro) + if i >= 0: + # Find opening parenthesis. Do a regular expression match here + # to make sure that we are matching the expected CHECK macro, as + # opposed to some other macro that happens to contain the CHECK + # substring. + matched = re.match(r"^(.*\b" + macro + r"\s*)\(", line) + if not matched: + continue + return (macro, len(matched.group(1))) + return (None, -1) def CheckCheck(filename, clean_lines, linenum, error): - """Checks the use of CHECK and EXPECT macros. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - - # Decide the set of replacement macros that should be suggested - lines = clean_lines.elided - (check_macro, start_pos) = FindCheckMacro(lines[linenum]) - if not check_macro: - return + """Checks the use of CHECK and EXPECT macros. - # Find end of the boolean expression by matching parentheses - (last_line, end_line, end_pos) = CloseExpression( - clean_lines, linenum, start_pos) - if end_pos < 0: - return + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ - # If the check macro is followed by something other than a - # semicolon, assume users will log their own custom error messages - # and don't suggest any replacements. - if not re.match(r'\s*;', last_line[end_pos:]): - return + # Decide the set of replacement macros that should be suggested + lines = clean_lines.elided + (check_macro, start_pos) = FindCheckMacro(lines[linenum]) + if not check_macro: + return - if linenum == end_line: - expression = lines[linenum][start_pos + 1:end_pos - 1] - else: - expression = lines[linenum][start_pos + 1:] - for i in range(linenum + 1, end_line): - expression += lines[i] - expression += last_line[0:end_pos - 1] - - # Parse expression so that we can take parentheses into account. - # This avoids false positives for inputs like "CHECK((a < 4) == b)", - # which is not replaceable by CHECK_LE. - lhs = '' - rhs = '' - operator = None - while expression: - matched = re.match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' - r'==|!=|>=|>|<=|<|\()(.*)$', expression) - if matched: - token = matched.group(1) - if token == '(': - # Parenthesized operand - expression = matched.group(2) - (end, _) = FindEndOfExpressionInLine(expression, 0, ['(']) - if end < 0: - return # Unmatched parenthesis - lhs += '(' + expression[0:end] - expression = expression[end:] - elif token in ('&&', '||'): - # Logical and/or operators. This means the expression - # contains more than one term, for example: - # CHECK(42 < a && a < b); - # - # These are not replaceable with CHECK_LE, so bail out early. + # Find end of the boolean expression by matching parentheses + (last_line, end_line, end_pos) = CloseExpression(clean_lines, linenum, start_pos) + if end_pos < 0: + return + + # If the check macro is followed by something other than a + # semicolon, assume users will log their own custom error messages + # and don't suggest any replacements. + if not re.match(r"\s*;", last_line[end_pos:]): return - elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): - # Non-relational operator - lhs += token - expression = matched.group(2) - else: - # Relational operator - operator = token - rhs = matched.group(2) - break + + if linenum == end_line: + expression = lines[linenum][start_pos + 1 : end_pos - 1] else: - # Unparenthesized operand. Instead of appending to lhs one character - # at a time, we do another regular expression match to consume several - # characters at once if possible. Trivial benchmark shows that this - # is more efficient when the operands are longer than a single - # character, which is generally the case. - matched = re.match(r'^([^-=!<>()&|]+)(.*)$', expression) - if not matched: - matched = re.match(r'^(\s*\S)(.*)$', expression) - if not matched: - break - lhs += matched.group(1) - expression = matched.group(2) - - # Only apply checks if we got all parts of the boolean expression - if not (lhs and operator and rhs): - return + expression = lines[linenum][start_pos + 1 :] + for i in range(linenum + 1, end_line): + expression += lines[i] + expression += last_line[0 : end_pos - 1] + + # Parse expression so that we can take parentheses into account. + # This avoids false positives for inputs like "CHECK((a < 4) == b)", + # which is not replaceable by CHECK_LE. + lhs = "" + rhs = "" + operator = None + while expression: + matched = re.match( + r"^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||" r"==|!=|>=|>|<=|<|\()(.*)$", + expression, + ) + if matched: + token = matched.group(1) + if token == "(": + # Parenthesized operand + expression = matched.group(2) + (end, _) = FindEndOfExpressionInLine(expression, 0, ["("]) + if end < 0: + return # Unmatched parenthesis + lhs += "(" + expression[0:end] + expression = expression[end:] + elif token in ("&&", "||"): + # Logical and/or operators. This means the expression + # contains more than one term, for example: + # CHECK(42 < a && a < b); + # + # These are not replaceable with CHECK_LE, so bail out early. + return + elif token in ("<<", "<<=", ">>", ">>=", "->*", "->"): + # Non-relational operator + lhs += token + expression = matched.group(2) + else: + # Relational operator + operator = token + rhs = matched.group(2) + break + else: + # Unparenthesized operand. Instead of appending to lhs one character + # at a time, we do another regular expression match to consume several + # characters at once if possible. Trivial benchmark shows that this + # is more efficient when the operands are longer than a single + # character, which is generally the case. + matched = re.match(r"^([^-=!<>()&|]+)(.*)$", expression) + if not matched: + matched = re.match(r"^(\s*\S)(.*)$", expression) + if not matched: + break + lhs += matched.group(1) + expression = matched.group(2) + + # Only apply checks if we got all parts of the boolean expression + if not (lhs and operator and rhs): + return - # Check that rhs do not contain logical operators. We already know - # that lhs is fine since the loop above parses out && and ||. - if rhs.find('&&') > -1 or rhs.find('||') > -1: - return + # Check that rhs do not contain logical operators. We already know + # that lhs is fine since the loop above parses out && and ||. + if rhs.find("&&") > -1 or rhs.find("||") > -1: + return - # At least one of the operands must be a constant literal. This is - # to avoid suggesting replacements for unprintable things like - # CHECK(variable != iterator) - # - # The following pattern matches decimal, hex integers, strings, and - # characters (in that order). - lhs = lhs.strip() - rhs = rhs.strip() - match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' - if re.match(match_constant, lhs) or re.match(match_constant, rhs): - # Note: since we know both lhs and rhs, we can provide a more - # descriptive error message like: - # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) - # Instead of: - # Consider using CHECK_EQ instead of CHECK(a == b) + # At least one of the operands must be a constant literal. This is + # to avoid suggesting replacements for unprintable things like + # CHECK(variable != iterator) # - # We are still keeping the less descriptive message because if lhs - # or rhs gets long, the error message might become unreadable. - error(filename, linenum, 'readability/check', 2, - f'Consider using {_CHECK_REPLACEMENT[check_macro][operator]}' - f' instead of {check_macro}(a {operator} b)') + # The following pattern matches decimal, hex integers, strings, and + # characters (in that order). + lhs = lhs.strip() + rhs = rhs.strip() + match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' + if re.match(match_constant, lhs) or re.match(match_constant, rhs): + # Note: since we know both lhs and rhs, we can provide a more + # descriptive error message like: + # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) + # Instead of: + # Consider using CHECK_EQ instead of CHECK(a == b) + # + # We are still keeping the less descriptive message because if lhs + # or rhs gets long, the error message might become unreadable. + error( + filename, + linenum, + "readability/check", + 2, + f"Consider using {_CHECK_REPLACEMENT[check_macro][operator]}" + f" instead of {check_macro}(a {operator} b)", + ) def CheckAltTokens(filename, clean_lines, linenum, error): - """Check alternative keywords being used in boolean expressions. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Avoid preprocessor lines - if re.match(r'^\s*#', line): - return + """Check alternative keywords being used in boolean expressions. - # Last ditch effort to avoid multi-line comments. This will not help - # if the comment started before the current line or ended after the - # current line, but it catches most of the false positives. At least, - # it provides a way to workaround this warning for people who use - # multi-line comments in preprocessor macros. - # - # TODO(unknown): remove this once cpplint has better support for - # multi-line comments. - if line.find('/*') >= 0 or line.find('*/') >= 0: - return + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Avoid preprocessor lines + if re.match(r"^\s*#", line): + return - for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): - error(filename, linenum, 'readability/alt_tokens', 2, - f'Use operator {_ALT_TOKEN_REPLACEMENT[match.group(2)]}' - f' instead of {match.group(2)}') + # Last ditch effort to avoid multi-line comments. This will not help + # if the comment started before the current line or ended after the + # current line, but it catches most of the false positives. At least, + # it provides a way to workaround this warning for people who use + # multi-line comments in preprocessor macros. + # + # TODO(unknown): remove this once cpplint has better support for + # multi-line comments. + if line.find("/*") >= 0 or line.find("*/") >= 0: + return + + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + error( + filename, + linenum, + "readability/alt_tokens", + 2, + f"Use operator {_ALT_TOKEN_REPLACEMENT[match.group(2)]}" + f" instead of {match.group(2)}", + ) def GetLineWidth(line): - """Determines the width of the line in column positions. - - Args: - line: A string, which may be a Unicode string. - - Returns: - The width of the line in column positions, accounting for Unicode - combining characters and wide characters. - """ - if isinstance(line, str): - width = 0 - for uc in unicodedata.normalize('NFC', line): - if unicodedata.east_asian_width(uc) in ('W', 'F'): - width += 2 - elif not unicodedata.combining(uc): - # Issue 337 - # https://mail.python.org/pipermail/python-list/2012-August/628809.html - if (sys.version_info.major, sys.version_info.minor) <= (3, 2): - # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81 - is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4 - # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564 - is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF - if not is_wide_build and is_low_surrogate: - width -= 1 - - width += 1 - return width - else: - return len(line) - - -def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, - error): - """Checks rules from the 'C++ style rules' section of cppguide.html. - - Most of these rules are hard to test (naming, comment style), but we - do what we can. In particular we check for 2-space indents, line lengths, - tab usage, spaces inside code, etc. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - file_extension: The extension (without the dot) of the filename. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - - # Don't use "elided" lines here, otherwise we can't check commented lines. - # Don't want to use "raw" either, because we don't want to check inside C++11 - # raw strings, - raw_lines = clean_lines.lines_without_raw_strings - line = raw_lines[linenum] - prev = raw_lines[linenum - 1] if linenum > 0 else '' - - if line.find('\t') != -1: - error(filename, linenum, 'whitespace/tab', 1, - 'Tab found; better to use spaces') - - # One or three blank spaces at the beginning of the line is weird; it's - # hard to reconcile that with 2-space indents. - # NOTE: here are the conditions rob pike used for his tests. Mine aren't - # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces - # if(RLENGTH > 20) complain = 0; - # if(match($0, " +(error|private|public|protected):")) complain = 0; - # if(match(prev, "&& *$")) complain = 0; - # if(match(prev, "\\|\\| *$")) complain = 0; - # if(match(prev, "[\",=><] *$")) complain = 0; - # if(match($0, " <<")) complain = 0; - # if(match(prev, " +for \\(")) complain = 0; - # if(prevodd && match(prevprev, " +for \\(")) complain = 0; - scope_or_label_pattern = r'\s*(?:public|private|protected|signals)(?:\s+(?:slots\s*)?)?:\s*\\?$' - classinfo = nesting_state.InnermostClass() - initial_spaces = 0 - cleansed_line = clean_lines.elided[linenum] - while initial_spaces < len(line) and line[initial_spaces] == ' ': - initial_spaces += 1 - # There are certain situations we allow one space, notably for - # section labels, and also lines containing multi-line raw strings. - # We also don't check for lines that look like continuation lines - # (of lines ending in double quotes, commas, equals, or angle brackets) - # because the rules for how to indent those are non-trivial. - if (not re.search(r'[",=><] *$', prev) and - (initial_spaces == 1 or initial_spaces == 3) and - not re.match(scope_or_label_pattern, cleansed_line) and - not (clean_lines.raw_lines[linenum] != line and - re.match(r'^\s*""', line))): - error(filename, linenum, 'whitespace/indent', 3, - 'Weird number of spaces at line-start. ' - 'Are you using a 2-space indent?') - - if line and line[-1].isspace(): - error(filename, linenum, 'whitespace/end_of_line', 4, - 'Line ends in whitespace. Consider deleting these extra spaces.') - - # Check if the line is a header guard. - is_header_guard = False - if IsHeaderExtension(file_extension): - cppvar = GetHeaderGuardCPPVariable(filename) - if (line.startswith(f'#ifndef {cppvar}') or - line.startswith(f'#define {cppvar}') or - line.startswith(f'#endif // {cppvar}')): - is_header_guard = True - # #include lines and header guards can be long, since there's no clean way to - # split them. - # - # URLs can be long too. It's possible to split these, but it makes them - # harder to cut&paste. - # - # The "$Id:...$" comment may also get very long without it being the - # developers fault. - # - # Doxygen documentation copying can get pretty long when using an overloaded - # function declaration - if (not line.startswith('#include') and not is_header_guard and - not re.match(r'^\s*//.*http(s?)://\S*$', line) and - not re.match(r'^\s*//\s*[^\s]*$', line) and - not re.match(r'^// \$Id:.*#[0-9]+ \$$', line) and - not re.match(r'^\s*/// [@\\](copydoc|copydetails|copybrief) .*$', line)): - line_width = GetLineWidth(line) - if line_width > _line_length: - error(filename, linenum, 'whitespace/line_length', 2, - f'Lines should be <= {_line_length} characters long') - - if (cleansed_line.count(';') > 1 and - # allow simple single line lambdas - not re.match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}\n\r]*\}', - line) and - # for loops are allowed two ;'s (and may run over two lines). - cleansed_line.find('for') == -1 and - (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or - GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and - # It's ok to have many commands in a switch case that fits in 1 line - not ((cleansed_line.find('case ') != -1 or - cleansed_line.find('default:') != -1) and - cleansed_line.find('break;') != -1)): - error(filename, linenum, 'whitespace/newline', 0, - 'More than one command on the same line') - - # Some more style checks - CheckBraces(filename, clean_lines, linenum, error) - CheckTrailingSemicolon(filename, clean_lines, linenum, error) - CheckEmptyBlockBody(filename, clean_lines, linenum, error) - CheckSpacing(filename, clean_lines, linenum, nesting_state, error) - CheckOperatorSpacing(filename, clean_lines, linenum, error) - CheckParenthesisSpacing(filename, clean_lines, linenum, error) - CheckCommaSpacing(filename, clean_lines, linenum, error) - CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) - CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) - CheckCheck(filename, clean_lines, linenum, error) - CheckAltTokens(filename, clean_lines, linenum, error) - classinfo = nesting_state.InnermostClass() - if classinfo: - CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) + """Determines the width of the line in column positions. + + Args: + line: A string, which may be a Unicode string. + + Returns: + The width of the line in column positions, accounting for Unicode + combining characters and wide characters. + """ + if isinstance(line, str): + width = 0 + for uc in unicodedata.normalize("NFC", line): + if unicodedata.east_asian_width(uc) in ("W", "F"): + width += 2 + elif not unicodedata.combining(uc): + # Issue 337 + # https://mail.python.org/pipermail/python-list/2012-August/628809.html + if (sys.version_info.major, sys.version_info.minor) <= (3, 2): + # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81 + is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4 + # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564 + is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF + if not is_wide_build and is_low_surrogate: + width -= 1 + + width += 1 + return width + else: + return len(line) + + +def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, error): + """Checks rules from the 'C++ style rules' section of cppguide.html. + + Most of these rules are hard to test (naming, comment style), but we + do what we can. In particular we check for 2-space indents, line lengths, + tab usage, spaces inside code, etc. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw_lines = clean_lines.lines_without_raw_strings + line = raw_lines[linenum] + prev = raw_lines[linenum - 1] if linenum > 0 else "" + + if line.find("\t") != -1: + error(filename, linenum, "whitespace/tab", 1, "Tab found; better to use spaces") + + # One or three blank spaces at the beginning of the line is weird; it's + # hard to reconcile that with 2-space indents. + # NOTE: here are the conditions rob pike used for his tests. Mine aren't + # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces + # if(RLENGTH > 20) complain = 0; + # if(match($0, " +(error|private|public|protected):")) complain = 0; + # if(match(prev, "&& *$")) complain = 0; + # if(match(prev, "\\|\\| *$")) complain = 0; + # if(match(prev, "[\",=><] *$")) complain = 0; + # if(match($0, " <<")) complain = 0; + # if(match(prev, " +for \\(")) complain = 0; + # if(prevodd && match(prevprev, " +for \\(")) complain = 0; + scope_or_label_pattern = ( + r"\s*(?:public|private|protected|signals)(?:\s+(?:slots\s*)?)?:\s*\\?$" + ) + classinfo = nesting_state.InnermostClass() + initial_spaces = 0 + cleansed_line = clean_lines.elided[linenum] + while initial_spaces < len(line) and line[initial_spaces] == " ": + initial_spaces += 1 + # There are certain situations we allow one space, notably for + # section labels, and also lines containing multi-line raw strings. + # We also don't check for lines that look like continuation lines + # (of lines ending in double quotes, commas, equals, or angle brackets) + # because the rules for how to indent those are non-trivial. + if ( + not re.search(r'[",=><] *$', prev) + and (initial_spaces == 1 or initial_spaces == 3) + and not re.match(scope_or_label_pattern, cleansed_line) + and not (clean_lines.raw_lines[linenum] != line and re.match(r'^\s*""', line)) + ): + error( + filename, + linenum, + "whitespace/indent", + 3, + "Weird number of spaces at line-start. " "Are you using a 2-space indent?", + ) + + if line and line[-1].isspace(): + error( + filename, + linenum, + "whitespace/end_of_line", + 4, + "Line ends in whitespace. Consider deleting these extra spaces.", + ) + + # Check if the line is a header guard. + is_header_guard = False + if IsHeaderExtension(file_extension): + cppvar = GetHeaderGuardCPPVariable(filename) + if ( + line.startswith(f"#ifndef {cppvar}") + or line.startswith(f"#define {cppvar}") + or line.startswith(f"#endif // {cppvar}") + ): + is_header_guard = True + # #include lines and header guards can be long, since there's no clean way to + # split them. + # + # URLs can be long too. It's possible to split these, but it makes them + # harder to cut&paste. + # + # The "$Id:...$" comment may also get very long without it being the + # developers fault. + # + # Doxygen documentation copying can get pretty long when using an overloaded + # function declaration + if ( + not line.startswith("#include") + and not is_header_guard + and not re.match(r"^\s*//.*http(s?)://\S*$", line) + and not re.match(r"^\s*//\s*[^\s]*$", line) + and not re.match(r"^// \$Id:.*#[0-9]+ \$$", line) + and not re.match(r"^\s*/// [@\\](copydoc|copydetails|copybrief) .*$", line) + ): + line_width = GetLineWidth(line) + if line_width > _line_length: + error( + filename, + linenum, + "whitespace/line_length", + 2, + f"Lines should be <= {_line_length} characters long", + ) + + if ( + cleansed_line.count(";") > 1 + and + # allow simple single line lambdas + not re.match(r"^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}\n\r]*\}", line) + and + # for loops are allowed two ;'s (and may run over two lines). + cleansed_line.find("for") == -1 + and ( + GetPreviousNonBlankLine(clean_lines, linenum)[0].find("for") == -1 + or GetPreviousNonBlankLine(clean_lines, linenum)[0].find(";") != -1 + ) + and + # It's ok to have many commands in a switch case that fits in 1 line + not ( + (cleansed_line.find("case ") != -1 or cleansed_line.find("default:") != -1) + and cleansed_line.find("break;") != -1 + ) + ): + error( + filename, + linenum, + "whitespace/newline", + 0, + "More than one command on the same line", + ) + + # Some more style checks + CheckBraces(filename, clean_lines, linenum, error) + CheckTrailingSemicolon(filename, clean_lines, linenum, error) + CheckEmptyBlockBody(filename, clean_lines, linenum, error) + CheckSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckOperatorSpacing(filename, clean_lines, linenum, error) + CheckParenthesisSpacing(filename, clean_lines, linenum, error) + CheckCommaSpacing(filename, clean_lines, linenum, error) + CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) + CheckCheck(filename, clean_lines, linenum, error) + CheckAltTokens(filename, clean_lines, linenum, error) + classinfo = nesting_state.InnermostClass() + if classinfo: + CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') @@ -5042,262 +5659,311 @@ def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, # _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' # _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' # _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' -_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') +_RE_FIRST_COMPONENT = re.compile(r"^[^-_.]+") def _DropCommonSuffixes(filename): - """Drops common suffixes like _test.cc or -inl.h from filename. - - For example: - >>> _DropCommonSuffixes('foo/foo-inl.h') - 'foo/foo' - >>> _DropCommonSuffixes('foo/bar/foo.cc') - 'foo/bar/foo' - >>> _DropCommonSuffixes('foo/foo_internal.h') - 'foo/foo' - >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') - 'foo/foo_unusualinternal' - - Args: - filename: The input filename. - - Returns: - The filename with the common suffix removed. - """ - for suffix in itertools.chain( - (f"{test_suffix.lstrip('_')}.{ext}" - for test_suffix, ext in itertools.product(_test_suffixes, GetNonHeaderExtensions())), - (f'{suffix}.{ext}' - for suffix, ext in itertools.product(['inl', 'imp', 'internal'], GetHeaderExtensions()))): - if (filename.endswith(suffix) and len(filename) > len(suffix) and - filename[-len(suffix) - 1] in ('-', '_')): - return filename[:-len(suffix) - 1] - return os.path.splitext(filename)[0] + """Drops common suffixes like _test.cc or -inl.h from filename. + + For example: + >>> _DropCommonSuffixes('foo/foo-inl.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/bar/foo.cc') + 'foo/bar/foo' + >>> _DropCommonSuffixes('foo/foo_internal.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') + 'foo/foo_unusualinternal' + + Args: + filename: The input filename. + + Returns: + The filename with the common suffix removed. + """ + for suffix in itertools.chain( + ( + f"{test_suffix.lstrip('_')}.{ext}" + for test_suffix, ext in itertools.product( + _test_suffixes, GetNonHeaderExtensions() + ) + ), + ( + f"{suffix}.{ext}" + for suffix, ext in itertools.product( + ["inl", "imp", "internal"], GetHeaderExtensions() + ) + ), + ): + if ( + filename.endswith(suffix) + and len(filename) > len(suffix) + and filename[-len(suffix) - 1] in ("-", "_") + ): + return filename[: -len(suffix) - 1] + return os.path.splitext(filename)[0] def _ClassifyInclude(fileinfo, include, used_angle_brackets, include_order="default"): - """Figures out what kind of header 'include' is. - - Args: - fileinfo: The current file cpplint is running over. A FileInfo instance. - include: The path to a #included file. - used_angle_brackets: True if the #include used <> rather than "". - include_order: "default" or other value allowed in program arguments - - Returns: - One of the _XXX_HEADER constants. - - For example: - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) - _C_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) - _CPP_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', True, "standardcfirst") - _OTHER_SYS_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) - _LIKELY_MY_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), - ... 'bar/foo_other_ext.h', False) - _POSSIBLE_MY_HEADER - >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) - _OTHER_HEADER - """ - # This is a list of all standard c++ header files, except - # those already checked for above. - is_cpp_header = include in _CPP_HEADERS - - # Mark include as C header if in list or in a known folder for standard-ish C headers. - is_std_c_header = (include_order == "default") or (include in _C_HEADERS - # additional linux glibc header folders - or re.search(rf'(?:{"|".join(C_STANDARD_HEADER_FOLDERS)})\/.*\.h', include)) - - # Headers with C++ extensions shouldn't be considered C system headers - include_ext = os.path.splitext(include)[1] - is_system = used_angle_brackets and include_ext not in ['.hh', '.hpp', '.hxx', '.h++'] - - if is_system: - if is_cpp_header: - return _CPP_SYS_HEADER - if is_std_c_header: - return _C_SYS_HEADER - else: - return _OTHER_SYS_HEADER - - # If the target file and the include we're checking share a - # basename when we drop common extensions, and the include - # lives in . , then it's likely to be owned by the target file. - target_dir, target_base = ( - os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) - include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) - target_dir_pub = os.path.normpath(target_dir + '/../public') - target_dir_pub = target_dir_pub.replace('\\', '/') - if target_base == include_base and ( - include_dir == target_dir or - include_dir == target_dir_pub): - return _LIKELY_MY_HEADER - - # If the target and include share some initial basename - # component, it's possible the target is implementing the - # include, so it's allowed to be first, but we'll never - # complain if it's not there. - target_first_component = _RE_FIRST_COMPONENT.match(target_base) - include_first_component = _RE_FIRST_COMPONENT.match(include_base) - if (target_first_component and include_first_component and - target_first_component.group(0) == - include_first_component.group(0)): - return _POSSIBLE_MY_HEADER - - return _OTHER_HEADER + """Figures out what kind of header 'include' is. + Args: + fileinfo: The current file cpplint is running over. A FileInfo instance. + include: The path to a #included file. + used_angle_brackets: True if the #include used <> rather than "". + include_order: "default" or other value allowed in program arguments + + Returns: + One of the _XXX_HEADER constants. + + For example: + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) + _C_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) + _CPP_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', True, "standardcfirst") + _OTHER_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) + _LIKELY_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), + ... 'bar/foo_other_ext.h', False) + _POSSIBLE_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) + _OTHER_HEADER + """ + # This is a list of all standard c++ header files, except + # those already checked for above. + is_cpp_header = include in _CPP_HEADERS + + # Mark include as C header if in list or in a known folder for standard-ish C headers. + is_std_c_header = (include_order == "default") or ( + include in _C_HEADERS + # additional linux glibc header folders + or re.search(rf'(?:{"|".join(C_STANDARD_HEADER_FOLDERS)})\/.*\.h', include) + ) + + # Headers with C++ extensions shouldn't be considered C system headers + include_ext = os.path.splitext(include)[1] + is_system = used_angle_brackets and include_ext not in [ + ".hh", + ".hpp", + ".hxx", + ".h++", + ] + + if is_system: + if is_cpp_header: + return _CPP_SYS_HEADER + if is_std_c_header: + return _C_SYS_HEADER + else: + return _OTHER_SYS_HEADER + + # If the target file and the include we're checking share a + # basename when we drop common extensions, and the include + # lives in . , then it's likely to be owned by the target file. + target_dir, target_base = os.path.split( + _DropCommonSuffixes(fileinfo.RepositoryName()) + ) + include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) + target_dir_pub = os.path.normpath(target_dir + "/../public") + target_dir_pub = target_dir_pub.replace("\\", "/") + if target_base == include_base and ( + include_dir == target_dir or include_dir == target_dir_pub + ): + return _LIKELY_MY_HEADER + + # If the target and include share some initial basename + # component, it's possible the target is implementing the + # include, so it's allowed to be first, but we'll never + # complain if it's not there. + target_first_component = _RE_FIRST_COMPONENT.match(target_base) + include_first_component = _RE_FIRST_COMPONENT.match(include_base) + if ( + target_first_component + and include_first_component + and target_first_component.group(0) == include_first_component.group(0) + ): + return _POSSIBLE_MY_HEADER + + return _OTHER_HEADER def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): - """Check rules that are applicable to #include lines. - - Strings on #include lines are NOT removed from elided line, to make - certain tasks easier. However, to prevent false positives, checks - applicable to #include lines in CheckLanguage must be put here. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - include_state: An _IncludeState instance in which the headers are inserted. - error: The function to call with any errors found. - """ - fileinfo = FileInfo(filename) - line = clean_lines.lines[linenum] - - # "include" should use the new style "foo/bar.h" instead of just "bar.h" - # Only do this check if the included header follows google naming - # conventions. If not, assume that it's a 3rd party API that - # requires special include conventions. - # - # We also make an exception for Lua headers, which follow google - # naming convention but not the include convention. - # JMM: Disabling. We do not maintain this convention in partthenon - match = re.match(r'#include\s*"([^/]+\.(.*))"', line) - if False and match: - if (IsHeaderExtension(match.group(2)) and - not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1))): - error(filename, linenum, 'build/include_subdir', 4, - 'Include the directory when naming header files') - - # we shouldn't include a file more than once. actually, there are a - # handful of instances where doing so is okay, but in general it's - # not. - match = _RE_PATTERN_INCLUDE.search(line) - if match: - include = match.group(2) - used_angle_brackets = match.group(1) == '<' - duplicate_line = include_state.FindHeader(include) - if duplicate_line >= 0: - error(filename, linenum, 'build/include', 4, - f'"{include}" already included at {filename}:{duplicate_line}') - return - - for extension in GetNonHeaderExtensions(): - if (include.endswith('.' + extension) and - os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)): - error(filename, linenum, 'build/include', 4, - 'Do not include .' + extension + ' files from other packages') - return + """Check rules that are applicable to #include lines. - # We DO want to include a 3rd party looking header if it matches the - # filename. Otherwise we get an erroneous error "...should include its - # header" error later. - third_src_header = False - for ext in GetHeaderExtensions(): - basefilename = filename[0:len(filename) - len(fileinfo.Extension())] - headerfile = basefilename + '.' + ext - headername = FileInfo(headerfile).RepositoryName() - if headername in include or include in headername: - third_src_header = True - break - - if third_src_header or not _THIRD_PARTY_HEADERS_PATTERN.match(include): - include_state.include_list[-1].append((include, linenum)) - - # We want to ensure that headers appear in the right order: - # 1) for foo.cc, foo.h (preferred location) - # 2) c system files - # 3) cpp system files - # 4) for foo.cc, foo.h (deprecated location) - # 5) other google headers - # - # We classify each include statement as one of those 5 types - # using a number of techniques. The include_state object keeps - # track of the highest type seen, and complains if we see a - # lower type after that. - error_message = include_state.CheckNextIncludeOrder( - _ClassifyInclude(fileinfo, include, used_angle_brackets, _include_order)) - if error_message: - error(filename, linenum, 'build/include_order', 4, - f'{error_message}. Should be: {fileinfo.BaseName()}.h, c system,' - ' c++ system, other.') - canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) - if not include_state.IsInAlphabeticalOrder( - clean_lines, linenum, canonical_include): - error(filename, linenum, 'build/include_alpha', 4, - f'Include "{include}" not in alphabetical order') - include_state.SetLastHeader(canonical_include) + Strings on #include lines are NOT removed from elided line, to make + certain tasks easier. However, to prevent false positives, checks + applicable to #include lines in CheckLanguage must be put here. + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + include_state: An _IncludeState instance in which the headers are inserted. + error: The function to call with any errors found. + """ + fileinfo = FileInfo(filename) + line = clean_lines.lines[linenum] + + # "include" should use the new style "foo/bar.h" instead of just "bar.h" + # Only do this check if the included header follows google naming + # conventions. If not, assume that it's a 3rd party API that + # requires special include conventions. + # + # We also make an exception for Lua headers, which follow google + # naming convention but not the include convention. + # JMM: Disabling. We do not maintain this convention in partthenon + match = re.match(r'#include\s*"([^/]+\.(.*))"', line) + if False and match: + if IsHeaderExtension(match.group(2)) and not _THIRD_PARTY_HEADERS_PATTERN.match( + match.group(1) + ): + error( + filename, + linenum, + "build/include_subdir", + 4, + "Include the directory when naming header files", + ) + + # we shouldn't include a file more than once. actually, there are a + # handful of instances where doing so is okay, but in general it's + # not. + match = _RE_PATTERN_INCLUDE.search(line) + if match: + include = match.group(2) + used_angle_brackets = match.group(1) == "<" + duplicate_line = include_state.FindHeader(include) + if duplicate_line >= 0: + error( + filename, + linenum, + "build/include", + 4, + f'"{include}" already included at {filename}:{duplicate_line}', + ) + return + + for extension in GetNonHeaderExtensions(): + if include.endswith("." + extension) and os.path.dirname( + fileinfo.RepositoryName() + ) != os.path.dirname(include): + error( + filename, + linenum, + "build/include", + 4, + "Do not include ." + extension + " files from other packages", + ) + return + + # We DO want to include a 3rd party looking header if it matches the + # filename. Otherwise we get an erroneous error "...should include its + # header" error later. + third_src_header = False + for ext in GetHeaderExtensions(): + basefilename = filename[0 : len(filename) - len(fileinfo.Extension())] + headerfile = basefilename + "." + ext + headername = FileInfo(headerfile).RepositoryName() + if headername in include or include in headername: + third_src_header = True + break + + if third_src_header or not _THIRD_PARTY_HEADERS_PATTERN.match(include): + include_state.include_list[-1].append((include, linenum)) + + # We want to ensure that headers appear in the right order: + # 1) for foo.cc, foo.h (preferred location) + # 2) c system files + # 3) cpp system files + # 4) for foo.cc, foo.h (deprecated location) + # 5) other google headers + # + # We classify each include statement as one of those 5 types + # using a number of techniques. The include_state object keeps + # track of the highest type seen, and complains if we see a + # lower type after that. + error_message = include_state.CheckNextIncludeOrder( + _ClassifyInclude(fileinfo, include, used_angle_brackets, _include_order) + ) + if error_message: + error( + filename, + linenum, + "build/include_order", + 4, + f"{error_message}. Should be: {fileinfo.BaseName()}.h, c system," + " c++ system, other.", + ) + canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) + if not include_state.IsInAlphabeticalOrder( + clean_lines, linenum, canonical_include + ): + error( + filename, + linenum, + "build/include_alpha", + 4, + f'Include "{include}" not in alphabetical order', + ) + include_state.SetLastHeader(canonical_include) def _GetTextInside(text, start_pattern): - r"""Retrieves all the text between matching open and close parentheses. - - Given a string of lines and a regular expression string, retrieve all the text - following the expression and between opening punctuation symbols like - (, [, or {, and the matching close-punctuation symbol. This properly nested - occurrences of the punctuations, so for the text like - printf(a(), b(c())); - a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. - start_pattern must match string having an open punctuation symbol at the end. - - Args: - text: The lines to extract text. Its comments and strings must be elided. - It can be single line and can span multiple lines. - start_pattern: The regexp string indicating where to start extracting - the text. - Returns: - The extracted text. - None if either the opening string or ending punctuation could not be found. - """ - # TODO(unknown): Audit cpplint.py to see what places could be profitably - # rewritten to use _GetTextInside (and use inferior regexp matching today). - - # Give opening punctuations to get the matching close-punctuations. - matching_punctuation = {'(': ')', '{': '}', '[': ']'} - closing_punctuation = set(dict.values(matching_punctuation)) - - # Find the position to start extracting text. - match = re.search(start_pattern, text, re.M) - if not match: # start_pattern not found in text. - return None - start_position = match.end(0) - - assert start_position > 0, ( - 'start_pattern must ends with an opening punctuation.') - assert text[start_position - 1] in matching_punctuation, ( - 'start_pattern must ends with an opening punctuation.') - # Stack of closing punctuations we expect to have in text after position. - punctuation_stack = [matching_punctuation[text[start_position - 1]]] - position = start_position - while punctuation_stack and position < len(text): - if text[position] == punctuation_stack[-1]: - punctuation_stack.pop() - elif text[position] in closing_punctuation: - # A closing punctuation without matching opening punctuations. - return None - elif text[position] in matching_punctuation: - punctuation_stack.append(matching_punctuation[text[position]]) - position += 1 - if punctuation_stack: - # Opening punctuations left without matching close-punctuations. - return None - # punctuations match. - return text[start_position:position - 1] + r"""Retrieves all the text between matching open and close parentheses. + + Given a string of lines and a regular expression string, retrieve all the text + following the expression and between opening punctuation symbols like + (, [, or {, and the matching close-punctuation symbol. This properly nested + occurrences of the punctuations, so for the text like + printf(a(), b(c())); + a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. + start_pattern must match string having an open punctuation symbol at the end. + + Args: + text: The lines to extract text. Its comments and strings must be elided. + It can be single line and can span multiple lines. + start_pattern: The regexp string indicating where to start extracting + the text. + Returns: + The extracted text. + None if either the opening string or ending punctuation could not be found. + """ + # TODO(unknown): Audit cpplint.py to see what places could be profitably + # rewritten to use _GetTextInside (and use inferior regexp matching today). + + # Give opening punctuations to get the matching close-punctuations. + matching_punctuation = {"(": ")", "{": "}", "[": "]"} + closing_punctuation = set(dict.values(matching_punctuation)) + + # Find the position to start extracting text. + match = re.search(start_pattern, text, re.M) + if not match: # start_pattern not found in text. + return None + start_position = match.end(0) + + assert start_position > 0, "start_pattern must ends with an opening punctuation." + assert ( + text[start_position - 1] in matching_punctuation + ), "start_pattern must ends with an opening punctuation." + # Stack of closing punctuations we expect to have in text after position. + punctuation_stack = [matching_punctuation[text[start_position - 1]]] + position = start_position + while punctuation_stack and position < len(text): + if text[position] == punctuation_stack[-1]: + punctuation_stack.pop() + elif text[position] in closing_punctuation: + # A closing punctuation without matching opening punctuations. + return None + elif text[position] in matching_punctuation: + punctuation_stack.append(matching_punctuation[text[position]]) + position += 1 + if punctuation_stack: + # Opening punctuations left without matching close-punctuations. + return None + # punctuations match. + return text[start_position : position - 1] # Patterns for matching call-by-reference parameters. @@ -5309,1669 +5975,2070 @@ def _GetTextInside(text, start_pattern): # > # | [^<>] )* # > -_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* +_RE_PATTERN_IDENT = r"[_a-zA-Z]\w*" # =~ [[:alpha:]][[:alnum:]]* _RE_PATTERN_TYPE = ( - r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' - r'(?:\w|' - r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' - r'::)+') + r"(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?" + r"(?:\w|" + r"\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|" + r"::)+" +) # A call-by-reference parameter ends with '& identifier'. _RE_PATTERN_REF_PARAM = re.compile( - r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' - r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') + r"(" + _RE_PATTERN_TYPE + r"(?:\s*(?:\bconst\b|[*]))*\s*" + r"&\s*" + _RE_PATTERN_IDENT + r")\s*(?:=[^,()]+)?[,)]" +) # A call-by-const-reference parameter either ends with 'const& identifier' # or looks like 'const type& identifier' when 'type' is atomic. _RE_PATTERN_CONST_REF_PARAM = ( - r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + - r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') + r"(?:.*\s*\bconst\s*&\s*" + + _RE_PATTERN_IDENT + + r"|const\s+" + + _RE_PATTERN_TYPE + + r"\s*&\s*" + + _RE_PATTERN_IDENT + + r")" +) # Stream types. -_RE_PATTERN_REF_STREAM_PARAM = ( - r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')') - - -def CheckLanguage(filename, clean_lines, linenum, file_extension, - include_state, nesting_state, error): - """Checks rules from the 'C++ language rules' section of cppguide.html. - - Some of these rules are hard to test (function overloading, using - uint32_t inappropriately), but we do the best we can. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - file_extension: The extension (without the dot) of the filename. - include_state: An _IncludeState instance in which the headers are inserted. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # If the line is empty or consists of entirely a comment, no need to - # check it. - line = clean_lines.elided[linenum] - if not line: - return +_RE_PATTERN_REF_STREAM_PARAM = r"(?:.*stream\s*&\s*" + _RE_PATTERN_IDENT + r")" - match = _RE_PATTERN_INCLUDE.search(line) - if match: - CheckIncludeLine(filename, clean_lines, linenum, include_state, error) - return - # Reset include state across preprocessor directives. This is meant - # to silence warnings for conditional includes. - match = re.match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) - if match: - include_state.ResetSection(match.group(1)) +def CheckLanguage( + filename, clean_lines, linenum, file_extension, include_state, nesting_state, error +): + """Checks rules from the 'C++ language rules' section of cppguide.html. + Some of these rules are hard to test (function overloading, using + uint32_t inappropriately), but we do the best we can. - # Perform other checks now that we are sure that this is not an include line - CheckCasts(filename, clean_lines, linenum, error) - CheckGlobalStatic(filename, clean_lines, linenum, error) - CheckPrintf(filename, clean_lines, linenum, error) + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + include_state: An _IncludeState instance in which the headers are inserted. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # If the line is empty or consists of entirely a comment, no need to + # check it. + line = clean_lines.elided[linenum] + if not line: + return - if IsHeaderExtension(file_extension): - # TODO(unknown): check that 1-arg constructors are explicit. - # How to tell it's a constructor? - # (handled in CheckForNonStandardConstructs for now) - # TODO(unknown): check that classes declare or disable copy/assign - # (level 1 error) - pass + match = _RE_PATTERN_INCLUDE.search(line) + if match: + CheckIncludeLine(filename, clean_lines, linenum, include_state, error) + return - # Check if people are using the verboten C basic types. The only exception - # we regularly allow is "unsigned short port" for port. - if re.search(r'\bshort port\b', line): - if not re.search(r'\bunsigned short port\b', line): - error(filename, linenum, 'runtime/int', 4, - 'Use "unsigned short" for ports, not "short"') - else: - match = re.search(r'\b(short|long(?! +double)|long long)\b', line) + # Reset include state across preprocessor directives. This is meant + # to silence warnings for conditional includes. + match = re.match(r"^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b", line) if match: - error(filename, linenum, 'runtime/int', 4, - f'Use int16_t/int64_t/etc, rather than the C type {match.group(1)}') - - # Check if some verboten operator overloading is going on - # TODO(unknown): catch out-of-line unary operator&: - # class X {}; - # int operator&(const X& x) { return 42; } // unary operator& - # The trick is it's hard to tell apart from binary operator&: - # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& - if re.search(r'\boperator\s*&\s*\(\s*\)', line): - error(filename, linenum, 'runtime/operator', 4, - 'Unary operator& is dangerous. Do not use it.') - - # Check for suspicious usage of "if" like - # } if (a == b) { - if re.search(r'\}\s*if\s*\(', line): - error(filename, linenum, 'readability/braces', 4, - 'Did you mean "else if"? If not, start a new line for "if".') - - # Check for potential format string bugs like printf(foo). - # We constrain the pattern not to pick things like DocidForPrintf(foo). - # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) - # TODO(unknown): Catch the following case. Need to change the calling - # convention of the whole function to process multiple line to handle it. - # printf( - # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); - printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') - if printf_args: - match = re.match(r'([\w.\->()]+)$', printf_args) - if match and match.group(1) != '__VA_ARGS__': - function_name = re.search(r'\b((?:string)?printf)\s*\(', - line, re.I).group(1) - error(filename, linenum, 'runtime/printf', 4, - 'Potential format string bug. Do' - f' {function_name}("%s", {match.group(1)}) instead.') - - # Check for potential memset bugs like memset(buf, sizeof(buf), 0). - match = re.search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) - if match and not re.match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): - error(filename, linenum, 'runtime/memset', 4, - f'Did you mean "memset({match.group(1)}, 0, {match.group(2)})"?') - - if re.search(r'\busing namespace\b', line): - if re.search(r'\bliterals\b', line): - error(filename, linenum, 'build/namespaces_literals', 5, - 'Do not use namespace using-directives. ' - 'Use using-declarations instead.') + include_state.ResetSection(match.group(1)) + + # Perform other checks now that we are sure that this is not an include line + CheckCasts(filename, clean_lines, linenum, error) + CheckGlobalStatic(filename, clean_lines, linenum, error) + CheckPrintf(filename, clean_lines, linenum, error) + + if IsHeaderExtension(file_extension): + # TODO(unknown): check that 1-arg constructors are explicit. + # How to tell it's a constructor? + # (handled in CheckForNonStandardConstructs for now) + # TODO(unknown): check that classes declare or disable copy/assign + # (level 1 error) + pass + + # Check if people are using the verboten C basic types. The only exception + # we regularly allow is "unsigned short port" for port. + if re.search(r"\bshort port\b", line): + if not re.search(r"\bunsigned short port\b", line): + error( + filename, + linenum, + "runtime/int", + 4, + 'Use "unsigned short" for ports, not "short"', + ) else: - error(filename, linenum, 'build/namespaces', 5, - 'Do not use namespace using-directives. ' - 'Use using-declarations instead.') - - # Detect variable-length arrays. - match = re.match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) - if (match and match.group(2) != 'return' and match.group(2) != 'delete' and - match.group(3).find(']') == -1): - # Split the size using space and arithmetic operators as delimiters. - # If any of the resulting tokens are not compile time constants then - # report the error. - tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) - is_const = True - skip_next = False - for tok in tokens: - if skip_next: + match = re.search(r"\b(short|long(?! +double)|long long)\b", line) + if match: + error( + filename, + linenum, + "runtime/int", + 4, + f"Use int16_t/int64_t/etc, rather than the C type {match.group(1)}", + ) + + # Check if some verboten operator overloading is going on + # TODO(unknown): catch out-of-line unary operator&: + # class X {}; + # int operator&(const X& x) { return 42; } // unary operator& + # The trick is it's hard to tell apart from binary operator&: + # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& + if re.search(r"\boperator\s*&\s*\(\s*\)", line): + error( + filename, + linenum, + "runtime/operator", + 4, + "Unary operator& is dangerous. Do not use it.", + ) + + # Check for suspicious usage of "if" like + # } if (a == b) { + if re.search(r"\}\s*if\s*\(", line): + error( + filename, + linenum, + "readability/braces", + 4, + 'Did you mean "else if"? If not, start a new line for "if".', + ) + + # Check for potential format string bugs like printf(foo). + # We constrain the pattern not to pick things like DocidForPrintf(foo). + # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) + # TODO(unknown): Catch the following case. Need to change the calling + # convention of the whole function to process multiple line to handle it. + # printf( + # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); + printf_args = _GetTextInside(line, r"(?i)\b(string)?printf\s*\(") + if printf_args: + match = re.match(r"([\w.\->()]+)$", printf_args) + if match and match.group(1) != "__VA_ARGS__": + function_name = re.search(r"\b((?:string)?printf)\s*\(", line, re.I).group( + 1 + ) + error( + filename, + linenum, + "runtime/printf", + 4, + "Potential format string bug. Do" + f' {function_name}("%s", {match.group(1)}) instead.', + ) + + # Check for potential memset bugs like memset(buf, sizeof(buf), 0). + match = re.search(r"memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)", line) + if match and not re.match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): + error( + filename, + linenum, + "runtime/memset", + 4, + f'Did you mean "memset({match.group(1)}, 0, {match.group(2)})"?', + ) + + if re.search(r"\busing namespace\b", line): + if re.search(r"\bliterals\b", line): + error( + filename, + linenum, + "build/namespaces_literals", + 5, + "Do not use namespace using-directives. " + "Use using-declarations instead.", + ) + else: + error( + filename, + linenum, + "build/namespaces", + 5, + "Do not use namespace using-directives. " + "Use using-declarations instead.", + ) + + # Detect variable-length arrays. + match = re.match(r"\s*(.+::)?(\w+) [a-z]\w*\[(.+)];", line) + if ( + match + and match.group(2) != "return" + and match.group(2) != "delete" + and match.group(3).find("]") == -1 + ): + # Split the size using space and arithmetic operators as delimiters. + # If any of the resulting tokens are not compile time constants then + # report the error. + tokens = re.split(r"\s|\+|\-|\*|\/|<<|>>]", match.group(3)) + is_const = True skip_next = False - continue - - if re.search(r'sizeof\(.+\)', tok): continue - if re.search(r'arraysize\(\w+\)', tok): continue - - tok = tok.lstrip('(') - tok = tok.rstrip(')') - if not tok: continue - if re.match(r'\d+', tok): continue - if re.match(r'0[xX][0-9a-fA-F]+', tok): continue - if re.match(r'k[A-Z0-9]\w*', tok): continue - if re.match(r'(.+::)?k[A-Z0-9]\w*', tok): continue - if re.match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue - # A catch all for tricky sizeof cases, including 'sizeof expression', - # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' - # requires skipping the next token because we split on ' ' and '*'. - if tok.startswith('sizeof'): - skip_next = True - continue - is_const = False - break - if not is_const: - error(filename, linenum, 'runtime/arrays', 1, - 'Do not use variable-length arrays. Use an appropriately named ' - "('k' followed by CamelCase) compile-time constant for the size.") - - # Check for use of unnamed namespaces in header files. Registration - # macros are typically OK, so we allow use of "namespace {" on lines - # that end with backslashes. - if (IsHeaderExtension(file_extension) - and re.search(r'\bnamespace\s*{', line) - and line[-1] != '\\'): - error(filename, linenum, 'build/namespaces_headers', 4, - 'Do not use unnamed namespaces in header files. See ' - 'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' - ' for more information.') + for tok in tokens: + if skip_next: + skip_next = False + continue + + if re.search(r"sizeof\(.+\)", tok): + continue + if re.search(r"arraysize\(\w+\)", tok): + continue + + tok = tok.lstrip("(") + tok = tok.rstrip(")") + if not tok: + continue + if re.match(r"\d+", tok): + continue + if re.match(r"0[xX][0-9a-fA-F]+", tok): + continue + if re.match(r"k[A-Z0-9]\w*", tok): + continue + if re.match(r"(.+::)?k[A-Z0-9]\w*", tok): + continue + if re.match(r"(.+::)?[A-Z][A-Z0-9_]*", tok): + continue + # A catch all for tricky sizeof cases, including 'sizeof expression', + # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' + # requires skipping the next token because we split on ' ' and '*'. + if tok.startswith("sizeof"): + skip_next = True + continue + is_const = False + break + if not is_const: + error( + filename, + linenum, + "runtime/arrays", + 1, + "Do not use variable-length arrays. Use an appropriately named " + "('k' followed by CamelCase) compile-time constant for the size.", + ) + + # Check for use of unnamed namespaces in header files. Registration + # macros are typically OK, so we allow use of "namespace {" on lines + # that end with backslashes. + if ( + IsHeaderExtension(file_extension) + and re.search(r"\bnamespace\s*{", line) + and line[-1] != "\\" + ): + error( + filename, + linenum, + "build/namespaces_headers", + 4, + "Do not use unnamed namespaces in header files. See " + "https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces" + " for more information.", + ) def CheckGlobalStatic(filename, clean_lines, linenum, error): - """Check for unsafe global or static objects. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Match two lines at a time to support multiline declarations - if linenum + 1 < clean_lines.NumLines() and not re.search(r'[;({]', line): - line += clean_lines.elided[linenum + 1].strip() - - # Check for people declaring static/global STL strings at the top level. - # This is dangerous because the C++ language does not guarantee that - # globals with constructors are initialized before the first access, and - # also because globals can be destroyed when some threads are still running. - # TODO(unknown): Generalize this to also find static unique_ptr instances. - # TODO(unknown): File bugs for clang-tidy to find these. - match = re.match( - r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +' - r'([a-zA-Z0-9_:]+)\b(.*)', - line) - - # Remove false positives: - # - String pointers (as opposed to values). - # string *pointer - # const string *pointer - # string const *pointer - # string *const pointer - # - # - Functions and template specializations. - # string Function(... - # string Class::Method(... - # - # - Operators. These are matched separately because operator names - # cross non-word boundaries, and trying to match both operators - # and functions at the same time would decrease accuracy of - # matching identifiers. - # string Class::operator*() - if (match and - not re.search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and - not re.search(r'\boperator\W', line) and - not re.match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))): - if re.search(r'\bconst\b', line): - error(filename, linenum, 'runtime/string', 4, - 'For a static/global string constant, use a C style string instead:' - f' "{match.group(1)}char{match.group(2) or ""} {match.group(3)}[]".') - else: - error(filename, linenum, 'runtime/string', 4, - 'Static/global string variables are not permitted.') + """Check for unsafe global or static objects. - if (re.search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or - re.search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)): - error(filename, linenum, 'runtime/init', 4, - 'You seem to be initializing a member variable with itself.') + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Match two lines at a time to support multiline declarations + if linenum + 1 < clean_lines.NumLines() and not re.search(r"[;({]", line): + line += clean_lines.elided[linenum + 1].strip() + + # Check for people declaring static/global STL strings at the top level. + # This is dangerous because the C++ language does not guarantee that + # globals with constructors are initialized before the first access, and + # also because globals can be destroyed when some threads are still running. + # TODO(unknown): Generalize this to also find static unique_ptr instances. + # TODO(unknown): File bugs for clang-tidy to find these. + match = re.match( + r"((?:|static +)(?:|const +))(?::*std::)?string( +const)? +" + r"([a-zA-Z0-9_:]+)\b(.*)", + line, + ) + + # Remove false positives: + # - String pointers (as opposed to values). + # string *pointer + # const string *pointer + # string const *pointer + # string *const pointer + # + # - Functions and template specializations. + # string Function(... + # string Class::Method(... + # + # - Operators. These are matched separately because operator names + # cross non-word boundaries, and trying to match both operators + # and functions at the same time would decrease accuracy of + # matching identifiers. + # string Class::operator*() + if ( + match + and not re.search(r"\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w", line) + and not re.search(r"\boperator\W", line) + and not re.match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4)) + ): + if re.search(r"\bconst\b", line): + error( + filename, + linenum, + "runtime/string", + 4, + "For a static/global string constant, use a C style string instead:" + f' "{match.group(1)}char{match.group(2) or ""} {match.group(3)}[]".', + ) + else: + error( + filename, + linenum, + "runtime/string", + 4, + "Static/global string variables are not permitted.", + ) + + if re.search(r"\b([A-Za-z0-9_]*_)\(\1\)", line) or re.search( + r"\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)", line + ): + error( + filename, + linenum, + "runtime/init", + 4, + "You seem to be initializing a member variable with itself.", + ) def CheckPrintf(filename, clean_lines, linenum, error): - """Check for printf related issues. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # When snprintf is used, the second argument shouldn't be a literal. - match = re.search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) - if match and match.group(2) != '0': - # If 2nd arg is zero, snprintf is used to calculate size. - error(filename, linenum, 'runtime/printf', 3, 'If you can, use' - f' sizeof({match.group(1)}) instead of {match.group(2)}' - ' as the 2nd arg to snprintf.') - - # Check if some verboten C functions are being used. - if re.search(r'\bsprintf\s*\(', line): - error(filename, linenum, 'runtime/printf', 5, - 'Never use sprintf. Use snprintf instead.') - match = re.search(r'\b(strcpy|strcat)\s*\(', line) - if match: - error(filename, linenum, 'runtime/printf', 4, - f'Almost always, snprintf is better than {match.group(1)}') + """Check for printf related issues. + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] -def IsDerivedFunction(clean_lines, linenum): - """Check if current line contains an inherited function. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line contains a function with "override" - virt-specifier. - """ - # Scan back a few lines for start of current function - for i in range(linenum, max(-1, linenum - 10), -1): - match = re.match(r'^([^()]*\w+)\(', clean_lines.elided[i]) + # When snprintf is used, the second argument shouldn't be a literal. + match = re.search(r"snprintf\s*\(([^,]*),\s*([0-9]*)\s*,", line) + if match and match.group(2) != "0": + # If 2nd arg is zero, snprintf is used to calculate size. + error( + filename, + linenum, + "runtime/printf", + 3, + "If you can, use" + f" sizeof({match.group(1)}) instead of {match.group(2)}" + " as the 2nd arg to snprintf.", + ) + + # Check if some verboten C functions are being used. + if re.search(r"\bsprintf\s*\(", line): + error( + filename, + linenum, + "runtime/printf", + 5, + "Never use sprintf. Use snprintf instead.", + ) + match = re.search(r"\b(strcpy|strcat)\s*\(", line) if match: - # Look for "override" after the matching closing parenthesis - line, _, closing_paren = CloseExpression( - clean_lines, i, len(match.group(1))) - return (closing_paren >= 0 and - re.search(r'\boverride\b', line[closing_paren:])) - return False + error( + filename, + linenum, + "runtime/printf", + 4, + f"Almost always, snprintf is better than {match.group(1)}", + ) + + +def IsDerivedFunction(clean_lines, linenum): + """Check if current line contains an inherited function. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains a function with "override" + virt-specifier. + """ + # Scan back a few lines for start of current function + for i in range(linenum, max(-1, linenum - 10), -1): + match = re.match(r"^([^()]*\w+)\(", clean_lines.elided[i]) + if match: + # Look for "override" after the matching closing parenthesis + line, _, closing_paren = CloseExpression( + clean_lines, i, len(match.group(1)) + ) + return closing_paren >= 0 and re.search( + r"\boverride\b", line[closing_paren:] + ) + return False def IsOutOfLineMethodDefinition(clean_lines, linenum): - """Check if current line contains an out-of-line method definition. + """Check if current line contains an out-of-line method definition. - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line contains an out-of-line method definition. - """ - # Scan back a few lines for start of current function - for i in range(linenum, max(-1, linenum - 10), -1): - if re.match(r'^([^()]*\w+)\(', clean_lines.elided[i]): - return re.match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None - return False + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains an out-of-line method definition. + """ + # Scan back a few lines for start of current function + for i in range(linenum, max(-1, linenum - 10), -1): + if re.match(r"^([^()]*\w+)\(", clean_lines.elided[i]): + return re.match(r"^[^()]*\w+::\w+\(", clean_lines.elided[i]) is not None + return False def IsInitializerList(clean_lines, linenum): - """Check if current line is inside constructor initializer list. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - Returns: - True if current line appears to be inside constructor initializer - list, False otherwise. - """ - for i in range(linenum, 1, -1): - line = clean_lines.elided[i] - if i == linenum: - remove_function_body = re.match(r'^(.*)\{\s*$', line) - if remove_function_body: - line = remove_function_body.group(1) - - if re.search(r'\s:\s*\w+[({]', line): - # A lone colon tend to indicate the start of a constructor - # initializer list. It could also be a ternary operator, which - # also tend to appear in constructor initializer lists as - # opposed to parameter lists. - return True - if re.search(r'\}\s*,\s*$', line): - # A closing brace followed by a comma is probably the end of a - # brace-initialized member in constructor initializer list. - return True - if re.search(r'[{};]\s*$', line): - # Found one of the following: - # - A closing brace or semicolon, probably the end of the previous - # function. - # - An opening brace, probably the start of current class or namespace. - # - # Current line is probably not inside an initializer list since - # we saw one of those things without seeing the starting colon. - return False - - # Got to the beginning of the file without seeing the start of - # constructor initializer list. - return False - - -def CheckForNonConstReference(filename, clean_lines, linenum, - nesting_state, error): - """Check for non-const references. - - Separate from CheckLanguage since it scans backwards from current - line, instead of scanning forward. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: The function to call with any errors found. - """ - # Do nothing if there is no '&' on current line. - line = clean_lines.elided[linenum] - if '&' not in line: - return + """Check if current line is inside constructor initializer list. - # If a function is inherited, current function doesn't have much of - # a choice, so any non-const references should not be blamed on - # derived function. - if IsDerivedFunction(clean_lines, linenum): - return + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line appears to be inside constructor initializer + list, False otherwise. + """ + for i in range(linenum, 1, -1): + line = clean_lines.elided[i] + if i == linenum: + remove_function_body = re.match(r"^(.*)\{\s*$", line) + if remove_function_body: + line = remove_function_body.group(1) + + if re.search(r"\s:\s*\w+[({]", line): + # A lone colon tend to indicate the start of a constructor + # initializer list. It could also be a ternary operator, which + # also tend to appear in constructor initializer lists as + # opposed to parameter lists. + return True + if re.search(r"\}\s*,\s*$", line): + # A closing brace followed by a comma is probably the end of a + # brace-initialized member in constructor initializer list. + return True + if re.search(r"[{};]\s*$", line): + # Found one of the following: + # - A closing brace or semicolon, probably the end of the previous + # function. + # - An opening brace, probably the start of current class or namespace. + # + # Current line is probably not inside an initializer list since + # we saw one of those things without seeing the starting colon. + return False + + # Got to the beginning of the file without seeing the start of + # constructor initializer list. + return False - # Don't warn on out-of-line method definitions, as we would warn on the - # in-line declaration, if it isn't marked with 'override'. - if IsOutOfLineMethodDefinition(clean_lines, linenum): - return - # Long type names may be broken across multiple lines, usually in one - # of these forms: - # LongType - # ::LongTypeContinued &identifier - # LongType:: - # LongTypeContinued &identifier - # LongType< - # ...>::LongTypeContinued &identifier - # - # If we detected a type split across two lines, join the previous - # line to current line so that we can match const references - # accordingly. - # - # Note that this only scans back one line, since scanning back - # arbitrary number of lines would be expensive. If you have a type - # that spans more than 2 lines, please use a typedef. - if linenum > 1: - previous = None - if re.match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): - # previous_line\n + ::current_line - previous = re.search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', - clean_lines.elided[linenum - 1]) - elif re.match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): - # previous_line::\n + current_line - previous = re.search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', - clean_lines.elided[linenum - 1]) - if previous: - line = previous.group(1) + line.lstrip() - else: - # Check for templated parameter that is split across multiple lines - endpos = line.rfind('>') - if endpos > -1: - (_, startline, startpos) = ReverseCloseExpression( - clean_lines, linenum, endpos) - if startpos > -1 and startline < linenum: - # Found the matching < on an earlier line, collect all - # pieces up to current line. - line = '' - for i in range(startline, linenum + 1): - line += clean_lines.elided[i].strip() - - # Check for non-const references in function parameters. A single '&' may - # found in the following places: - # inside expression: binary & for bitwise AND - # inside expression: unary & for taking the address of something - # inside declarators: reference parameter - # We will exclude the first two cases by checking that we are not inside a - # function body, including one that was just introduced by a trailing '{'. - # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. - if (nesting_state.previous_stack_top and - not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or - isinstance(nesting_state.previous_stack_top, _NamespaceInfo))): - # Not at toplevel, not within a class, and not within a namespace - return +def CheckForNonConstReference(filename, clean_lines, linenum, nesting_state, error): + """Check for non-const references. - # Avoid initializer lists. We only need to scan back from the - # current line for something that starts with ':'. - # - # We don't need to check the current line, since the '&' would - # appear inside the second set of parentheses on the current line as - # opposed to the first set. - if linenum > 0: - for i in range(linenum - 1, max(0, linenum - 10), -1): - previous_line = clean_lines.elided[i] - if not re.search(r'[),]\s*$', previous_line): - break - if re.match(r'^\s*:\s+\S', previous_line): + Separate from CheckLanguage since it scans backwards from current + line, instead of scanning forward. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # Do nothing if there is no '&' on current line. + line = clean_lines.elided[linenum] + if "&" not in line: return - # Avoid preprocessors - if re.search(r'\\\s*$', line): - return + # If a function is inherited, current function doesn't have much of + # a choice, so any non-const references should not be blamed on + # derived function. + if IsDerivedFunction(clean_lines, linenum): + return - # Avoid constructor initializer lists - if IsInitializerList(clean_lines, linenum): - return + # Don't warn on out-of-line method definitions, as we would warn on the + # in-line declaration, if it isn't marked with 'override'. + if IsOutOfLineMethodDefinition(clean_lines, linenum): + return - # We allow non-const references in a few standard places, like functions - # called "swap()" or iostream operators like "<<" or ">>". Do not check - # those function parameters. - # - # We also accept & in static_assert, which looks like a function but - # it's actually a declaration expression. - allowed_functions = (r'(?:[sS]wap(?:<\w:+>)?|' - r'operator\s*[<>][<>]|' - r'static_assert|COMPILE_ASSERT' - r')\s*\(') - if re.search(allowed_functions, line): - return - elif not re.search(r'\S+\([^)]*$', line): - # Don't see an allowed function on this line. Actually we - # didn't see any function name on this line, so this is likely a - # multi-line parameter list. Try a bit harder to catch this case. - for i in range(2): - if (linenum > i and - re.search(allowed_functions, clean_lines.elided[linenum - i - 1])): + # Long type names may be broken across multiple lines, usually in one + # of these forms: + # LongType + # ::LongTypeContinued &identifier + # LongType:: + # LongTypeContinued &identifier + # LongType< + # ...>::LongTypeContinued &identifier + # + # If we detected a type split across two lines, join the previous + # line to current line so that we can match const references + # accordingly. + # + # Note that this only scans back one line, since scanning back + # arbitrary number of lines would be expensive. If you have a type + # that spans more than 2 lines, please use a typedef. + if linenum > 1: + previous = None + if re.match(r"\s*::(?:[\w<>]|::)+\s*&\s*\S", line): + # previous_line\n + ::current_line + previous = re.search( + r"\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$", + clean_lines.elided[linenum - 1], + ) + elif re.match(r"\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S", line): + # previous_line::\n + current_line + previous = re.search( + r"\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$", + clean_lines.elided[linenum - 1], + ) + if previous: + line = previous.group(1) + line.lstrip() + else: + # Check for templated parameter that is split across multiple lines + endpos = line.rfind(">") + if endpos > -1: + (_, startline, startpos) = ReverseCloseExpression( + clean_lines, linenum, endpos + ) + if startpos > -1 and startline < linenum: + # Found the matching < on an earlier line, collect all + # pieces up to current line. + line = "" + for i in range(startline, linenum + 1): + line += clean_lines.elided[i].strip() + + # Check for non-const references in function parameters. A single '&' may + # found in the following places: + # inside expression: binary & for bitwise AND + # inside expression: unary & for taking the address of something + # inside declarators: reference parameter + # We will exclude the first two cases by checking that we are not inside a + # function body, including one that was just introduced by a trailing '{'. + # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. + if nesting_state.previous_stack_top and not ( + isinstance(nesting_state.previous_stack_top, _ClassInfo) + or isinstance(nesting_state.previous_stack_top, _NamespaceInfo) + ): + # Not at toplevel, not within a class, and not within a namespace + return + + # Avoid initializer lists. We only need to scan back from the + # current line for something that starts with ':'. + # + # We don't need to check the current line, since the '&' would + # appear inside the second set of parentheses on the current line as + # opposed to the first set. + if linenum > 0: + for i in range(linenum - 1, max(0, linenum - 10), -1): + previous_line = clean_lines.elided[i] + if not re.search(r"[),]\s*$", previous_line): + break + if re.match(r"^\s*:\s+\S", previous_line): + return + + # Avoid preprocessors + if re.search(r"\\\s*$", line): + return + + # Avoid constructor initializer lists + if IsInitializerList(clean_lines, linenum): return - decls = re.sub(r'{[^}]*}', ' ', line) # exclude function body - for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): - if (not re.match(_RE_PATTERN_CONST_REF_PARAM, parameter) and - not re.match(_RE_PATTERN_REF_STREAM_PARAM, parameter)): - error(filename, linenum, 'runtime/references', 2, - 'Is this a non-const reference? ' - 'If so, make const or use a pointer: ' + - re.sub(' *<', '<', parameter)) + # We allow non-const references in a few standard places, like functions + # called "swap()" or iostream operators like "<<" or ">>". Do not check + # those function parameters. + # + # We also accept & in static_assert, which looks like a function but + # it's actually a declaration expression. + allowed_functions = ( + r"(?:[sS]wap(?:<\w:+>)?|" + r"operator\s*[<>][<>]|" + r"static_assert|COMPILE_ASSERT" + r")\s*\(" + ) + if re.search(allowed_functions, line): + return + elif not re.search(r"\S+\([^)]*$", line): + # Don't see an allowed function on this line. Actually we + # didn't see any function name on this line, so this is likely a + # multi-line parameter list. Try a bit harder to catch this case. + for i in range(2): + if linenum > i and re.search( + allowed_functions, clean_lines.elided[linenum - i - 1] + ): + return + + decls = re.sub(r"{[^}]*}", " ", line) # exclude function body + for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): + if not re.match(_RE_PATTERN_CONST_REF_PARAM, parameter) and not re.match( + _RE_PATTERN_REF_STREAM_PARAM, parameter + ): + error( + filename, + linenum, + "runtime/references", + 2, + "Is this a non-const reference? " + "If so, make const or use a pointer: " + re.sub(" *<", "<", parameter), + ) + + +def CheckCasts(filename, clean_lines, linenum, error): + """Various cast related checks. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + # Check to see if they're using an conversion function cast. + # I just try to capture the most common basic types, though there are more. + # Parameterless conversion functions, such as bool(), are allowed as they are + # probably a member operator declaration or default constructor. + match = re.search( + r"(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b" + r"(int|float|double|bool|char|int16_t|uint16_t|int32_t|uint32_t|int64_t|uint64_t)" + r"(\([^)].*)", + line, + ) + expecting_function = ExpectingFunctionArgs(clean_lines, linenum) + if match and not expecting_function: + matched_type = match.group(2) -def CheckCasts(filename, clean_lines, linenum, error): - """Various cast related checks. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - # Check to see if they're using an conversion function cast. - # I just try to capture the most common basic types, though there are more. - # Parameterless conversion functions, such as bool(), are allowed as they are - # probably a member operator declaration or default constructor. - match = re.search( - r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b' - r'(int|float|double|bool|char|int16_t|uint16_t|int32_t|uint32_t|int64_t|uint64_t)' - r'(\([^)].*)', line) - expecting_function = ExpectingFunctionArgs(clean_lines, linenum) - if match and not expecting_function: - matched_type = match.group(2) - - # matched_new_or_template is used to silence two false positives: - # - New operators - # - Template arguments with function types + # matched_new_or_template is used to silence two false positives: + # - New operators + # - Template arguments with function types + # + # For template arguments, we match on types immediately following + # an opening bracket without any spaces. This is a fast way to + # silence the common case where the function type is the first + # template argument. False negative with less-than comparison is + # avoided because those operators are usually followed by a space. + # + # function // bracket + no space = false positive + # value < double(42) // bracket + space = true positive + matched_new_or_template = match.group(1) + + # Avoid arrays by looking for brackets that come after the closing + # parenthesis. + if re.match(r"\([^()]+\)\s*\[", match.group(3)): + return + + # Other things to ignore: + # - Function pointers + # - Casts to pointer types + # - Placement new + # - Alias declarations + matched_funcptr = match.group(3) + if ( + matched_new_or_template is None + and not ( + matched_funcptr + and ( + re.match(r"\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(", matched_funcptr) + or matched_funcptr.startswith("(*)") + ) + ) + and not re.match(r"\s*using\s+\S+\s*=\s*" + matched_type, line) + and not re.search(r"new\(\S+\)\s*" + matched_type, line) + ): + error( + filename, + linenum, + "readability/casting", + 4, + "Using deprecated casting style. " + f"Use static_cast<{matched_type}>(...) instead", + ) + + if not expecting_function: + CheckCStyleCast( + filename, + clean_lines, + linenum, + "static_cast", + r"\((int|float|double|bool|char|u?int(16|32|64)_t|size_t)\)", + error, + ) + + # This doesn't catch all cases. Consider (const char * const)"hello". # - # For template arguments, we match on types immediately following - # an opening bracket without any spaces. This is a fast way to - # silence the common case where the function type is the first - # template argument. False negative with less-than comparison is - # avoided because those operators are usually followed by a space. + # (char *) "foo" should always be a const_cast (reinterpret_cast won't + # compile). + if CheckCStyleCast( + filename, clean_lines, linenum, "const_cast", r'\((char\s?\*+\s?)\)\s*"', error + ): + pass + else: + # Check pointer casts for other than string constants + CheckCStyleCast( + filename, + clean_lines, + linenum, + "reinterpret_cast", + r"\((\w+\s?\*+\s?)\)", + error, + ) + + # In addition, we look for people taking the address of a cast. This + # is dangerous -- casts can assign to temporaries, so the pointer doesn't + # point where you think. # - # function // bracket + no space = false positive - # value < double(42) // bracket + space = true positive - matched_new_or_template = match.group(1) - - # Avoid arrays by looking for brackets that come after the closing - # parenthesis. - if re.match(r'\([^()]+\)\s*\[', match.group(3)): - return - - # Other things to ignore: - # - Function pointers - # - Casts to pointer types - # - Placement new - # - Alias declarations - matched_funcptr = match.group(3) - if (matched_new_or_template is None and - not (matched_funcptr and - (re.match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', - matched_funcptr) or - matched_funcptr.startswith('(*)'))) and - not re.match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and - not re.search(r'new\(\S+\)\s*' + matched_type, line)): - error(filename, linenum, 'readability/casting', 4, - 'Using deprecated casting style. ' - f'Use static_cast<{matched_type}>(...) instead') - - if not expecting_function: - CheckCStyleCast(filename, clean_lines, linenum, 'static_cast', - r'\((int|float|double|bool|char|u?int(16|32|64)_t|size_t)\)', error) - - # This doesn't catch all cases. Consider (const char * const)"hello". - # - # (char *) "foo" should always be a const_cast (reinterpret_cast won't - # compile). - if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast', - r'\((char\s?\*+\s?)\)\s*"', error): - pass - else: - # Check pointer casts for other than string constants - CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast', - r'\((\w+\s?\*+\s?)\)', error) - - # In addition, we look for people taking the address of a cast. This - # is dangerous -- casts can assign to temporaries, so the pointer doesn't - # point where you think. - # - # Some non-identifier character is required before the '&' for the - # expression to be recognized as a cast. These are casts: - # expression = &static_cast(temporary()); - # function(&(int*)(temporary())); - # - # This is not a cast: - # reference_type&(int* function_param); - match = re.search( - r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|' - r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line) - if match: - # Try a better error message when the & is bound to something - # dereferenced by the casted pointer, as opposed to the casted - # pointer itself. - parenthesis_error = False - match = re.match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line) + # Some non-identifier character is required before the '&' for the + # expression to be recognized as a cast. These are casts: + # expression = &static_cast(temporary()); + # function(&(int*)(temporary())); + # + # This is not a cast: + # reference_type&(int* function_param); + match = re.search( + r"(?:[^\w]&\(([^)*][^)]*)\)[\w(])|" + r"(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)", + line, + ) if match: - _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) - if x1 >= 0 and clean_lines.elided[y1][x1] == '(': - _, y2, x2 = CloseExpression(clean_lines, y1, x1) - if x2 >= 0: - extended_line = clean_lines.elided[y2][x2:] - if y2 < clean_lines.NumLines() - 1: - extended_line += clean_lines.elided[y2 + 1] - if re.match(r'\s*(?:->|\[)', extended_line): - parenthesis_error = True - - if parenthesis_error: - error(filename, linenum, 'readability/casting', 4, - ('Are you taking an address of something dereferenced ' - 'from a cast? Wrapping the dereferenced expression in ' - 'parentheses will make the binding more obvious')) - else: - error(filename, linenum, 'runtime/casting', 4, - ('Are you taking an address of a cast? ' - 'This is dangerous: could be a temp var. ' - 'Take the address before doing the cast, rather than after')) + # Try a better error message when the & is bound to something + # dereferenced by the casted pointer, as opposed to the casted + # pointer itself. + parenthesis_error = False + match = re.match(r"^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<", line) + if match: + _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) + if x1 >= 0 and clean_lines.elided[y1][x1] == "(": + _, y2, x2 = CloseExpression(clean_lines, y1, x1) + if x2 >= 0: + extended_line = clean_lines.elided[y2][x2:] + if y2 < clean_lines.NumLines() - 1: + extended_line += clean_lines.elided[y2 + 1] + if re.match(r"\s*(?:->|\[)", extended_line): + parenthesis_error = True + + if parenthesis_error: + error( + filename, + linenum, + "readability/casting", + 4, + ( + "Are you taking an address of something dereferenced " + "from a cast? Wrapping the dereferenced expression in " + "parentheses will make the binding more obvious" + ), + ) + else: + error( + filename, + linenum, + "runtime/casting", + 4, + ( + "Are you taking an address of a cast? " + "This is dangerous: could be a temp var. " + "Take the address before doing the cast, rather than after" + ), + ) def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): - """Checks for a C-style cast by looking for the pattern. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - cast_type: The string for the C++ cast to recommend. This is either - reinterpret_cast, static_cast, or const_cast, depending. - pattern: The regular expression used to find C-style casts. - error: The function to call with any errors found. - - Returns: - True if an error was emitted. - False otherwise. - """ - line = clean_lines.elided[linenum] - match = re.search(pattern, line) - if not match: - return False + """Checks for a C-style cast by looking for the pattern. - # Exclude lines with keywords that tend to look like casts - context = line[0:match.start(1) - 1] - if re.match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): - return False + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + cast_type: The string for the C++ cast to recommend. This is either + reinterpret_cast, static_cast, or const_cast, depending. + pattern: The regular expression used to find C-style casts. + error: The function to call with any errors found. - # Try expanding current context to see if we one level of - # parentheses inside a macro. - if linenum > 0: - for i in range(linenum - 1, max(0, linenum - 5), -1): - context = clean_lines.elided[i] + context - if re.match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): - return False + Returns: + True if an error was emitted. + False otherwise. + """ + line = clean_lines.elided[linenum] + match = re.search(pattern, line) + if not match: + return False - # operator++(int) and operator--(int) - if (context.endswith(' operator++') or context.endswith(' operator--') or - context.endswith('::operator++') or context.endswith('::operator--')): - return False + # Exclude lines with keywords that tend to look like casts + context = line[0 : match.start(1) - 1] + if re.match(r".*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$", context): + return False - # A single unnamed argument for a function tends to look like old style cast. - # If we see those, don't issue warnings for deprecated casts. - remainder = line[match.end(0):] - if re.match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)', - remainder): - return False + # Try expanding current context to see if we one level of + # parentheses inside a macro. + if linenum > 0: + for i in range(linenum - 1, max(0, linenum - 5), -1): + context = clean_lines.elided[i] + context + if re.match(r".*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$", context): + return False + + # operator++(int) and operator--(int) + if ( + context.endswith(" operator++") + or context.endswith(" operator--") + or context.endswith("::operator++") + or context.endswith("::operator--") + ): + return False + + # A single unnamed argument for a function tends to look like old style cast. + # If we see those, don't issue warnings for deprecated casts. + remainder = line[match.end(0) :] + if re.match(r"^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)", remainder): + return False - # At this point, all that should be left is actual casts. - error(filename, linenum, 'readability/casting', 4, - f'Using C-style cast. Use {cast_type}<{match.group(1)}>(...) instead') + # At this point, all that should be left is actual casts. + error( + filename, + linenum, + "readability/casting", + 4, + f"Using C-style cast. Use {cast_type}<{match.group(1)}>(...) instead", + ) - return True + return True def ExpectingFunctionArgs(clean_lines, linenum): - """Checks whether where function type arguments are expected. - - Args: - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - - Returns: - True if the line at 'linenum' is inside something that expects arguments - of function types. - """ - line = clean_lines.elided[linenum] - return (re.match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or - (linenum >= 2 and - (re.match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', - clean_lines.elided[linenum - 1]) or - re.match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', - clean_lines.elided[linenum - 2]) or - re.search(r'\bstd::m?function\s*\<\s*$', - clean_lines.elided[linenum - 1])))) + """Checks whether where function type arguments are expected. + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + + Returns: + True if the line at 'linenum' is inside something that expects arguments + of function types. + """ + line = clean_lines.elided[linenum] + return re.match(r"^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(", line) or ( + linenum >= 2 + and ( + re.match( + r"^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$", + clean_lines.elided[linenum - 1], + ) + or re.match( + r"^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$", + clean_lines.elided[linenum - 2], + ) + or re.search(r"\bstd::m?function\s*\<\s*$", clean_lines.elided[linenum - 1]) + ) + ) -_HEADERS_CONTAINING_TEMPLATES = ( - ('', ('deque',)), - ('', ('unary_function', 'binary_function', - 'plus', 'minus', 'multiplies', 'divides', 'modulus', - 'negate', - 'equal_to', 'not_equal_to', 'greater', 'less', - 'greater_equal', 'less_equal', - 'logical_and', 'logical_or', 'logical_not', - 'unary_negate', 'not1', 'binary_negate', 'not2', - 'bind1st', 'bind2nd', - 'pointer_to_unary_function', - 'pointer_to_binary_function', - 'ptr_fun', - 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', - 'mem_fun_ref_t', - 'const_mem_fun_t', 'const_mem_fun1_t', - 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', - 'mem_fun_ref', - )), - ('', ('numeric_limits',)), - ('', ('list',)), - ('', ('multimap',)), - ('', ('allocator', 'make_shared', 'make_unique', 'shared_ptr', - 'unique_ptr', 'weak_ptr')), - ('', ('queue', 'priority_queue',)), - ('', ('set', 'multiset',)), - ('', ('stack',)), - ('', ('char_traits', 'basic_string',)), - ('', ('tuple',)), - ('', ('unordered_map', 'unordered_multimap')), - ('', ('unordered_set', 'unordered_multiset')), - ('', ('pair',)), - ('', ('vector',)), +_HEADERS_CONTAINING_TEMPLATES = ( + ("", ("deque",)), + ( + "", + ( + "unary_function", + "binary_function", + "plus", + "minus", + "multiplies", + "divides", + "modulus", + "negate", + "equal_to", + "not_equal_to", + "greater", + "less", + "greater_equal", + "less_equal", + "logical_and", + "logical_or", + "logical_not", + "unary_negate", + "not1", + "binary_negate", + "not2", + "bind1st", + "bind2nd", + "pointer_to_unary_function", + "pointer_to_binary_function", + "ptr_fun", + "mem_fun_t", + "mem_fun", + "mem_fun1_t", + "mem_fun1_ref_t", + "mem_fun_ref_t", + "const_mem_fun_t", + "const_mem_fun1_t", + "const_mem_fun_ref_t", + "const_mem_fun1_ref_t", + "mem_fun_ref", + ), + ), + ("", ("numeric_limits",)), + ("", ("list",)), + ("", ("multimap",)), + ( + "", + ( + "allocator", + "make_shared", + "make_unique", + "shared_ptr", + "unique_ptr", + "weak_ptr", + ), + ), + ( + "", + ( + "queue", + "priority_queue", + ), + ), + ( + "", + ( + "set", + "multiset", + ), + ), + ("", ("stack",)), + ( + "", + ( + "char_traits", + "basic_string", + ), + ), + ("", ("tuple",)), + ("", ("unordered_map", "unordered_multimap")), + ("", ("unordered_set", "unordered_multiset")), + ("", ("pair",)), + ("", ("vector",)), # gcc extensions. # Note: std::hash is their hash, ::hash is our hash - ('', ('hash_map', 'hash_multimap',)), - ('', ('hash_set', 'hash_multiset',)), - ('', ('slist',)), - ) + ( + "", + ( + "hash_map", + "hash_multimap", + ), + ), + ( + "", + ( + "hash_set", + "hash_multiset", + ), + ), + ("", ("slist",)), +) _HEADERS_MAYBE_TEMPLATES = ( - ('', ('copy', 'max', 'min', 'min_element', 'sort', - 'transform', - )), - ('', ('forward', 'make_pair', 'move', 'swap')), - ) + ( + "", + ( + "copy", + "max", + "min", + "min_element", + "sort", + "transform", + ), + ), + ("", ("forward", "make_pair", "move", "swap")), +) # Non templated types or global objects _HEADERS_TYPES_OR_OBJS = ( # String and others are special -- it is a non-templatized type in STL. - ('', ('string',)), - ('', ('cin', 'cout', 'cerr', 'clog', 'wcin', 'wcout', - 'wcerr', 'wclog')), - ('', ('FILE', 'fpos_t'))) + ("", ("string",)), + ("", ("cin", "cout", "cerr", "clog", "wcin", "wcout", "wcerr", "wclog")), + ("", ("FILE", "fpos_t")), +) # Non templated functions _HEADERS_FUNCTIONS = ( - ('', ('fopen', 'freopen', - 'fclose', 'fflush', 'setbuf', 'setvbuf', 'fread', - 'fwrite', 'fgetc', 'getc', 'fgets', 'fputc', 'putc', - 'fputs', 'getchar', 'gets', 'putchar', 'puts', 'ungetc', - 'scanf', 'fscanf', 'sscanf', 'vscanf', 'vfscanf', - 'vsscanf', 'printf', 'fprintf', 'sprintf', 'snprintf', - 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf', - 'ftell', 'fgetpos', 'fseek', 'fsetpos', - 'clearerr', 'feof', 'ferror', 'perror', - 'tmpfile', 'tmpnam'),),) + ( + "", + ( + "fopen", + "freopen", + "fclose", + "fflush", + "setbuf", + "setvbuf", + "fread", + "fwrite", + "fgetc", + "getc", + "fgets", + "fputc", + "putc", + "fputs", + "getchar", + "gets", + "putchar", + "puts", + "ungetc", + "scanf", + "fscanf", + "sscanf", + "vscanf", + "vfscanf", + "vsscanf", + "printf", + "fprintf", + "sprintf", + "snprintf", + "vprintf", + "vfprintf", + "vsprintf", + "vsnprintf", + "ftell", + "fgetpos", + "fseek", + "fsetpos", + "clearerr", + "feof", + "ferror", + "perror", + "tmpfile", + "tmpnam", + ), + ), +) _re_pattern_headers_maybe_templates = [] for _header, _templates in _HEADERS_MAYBE_TEMPLATES: - for _template in _templates: - # Match max(..., ...), max(..., ...), but not foo->max, foo.max or - # 'type::max()'. - _re_pattern_headers_maybe_templates.append( - (re.compile(r'((\bstd::)|[^>.:])\b' + _template + r'(<.*?>)?\([^\)]'), - _template, - _header)) + for _template in _templates: + # Match max(..., ...), max(..., ...), but not foo->max, foo.max or + # 'type::max()'. + _re_pattern_headers_maybe_templates.append( + ( + re.compile(r"((\bstd::)|[^>.:])\b" + _template + r"(<.*?>)?\([^\)]"), + _template, + _header, + ) + ) # Map is often overloaded. Only check, if it is fully qualified. # Match 'std::map(...)', but not 'map(...)'' _re_pattern_headers_maybe_templates.append( - (re.compile(r'(std\b::\bmap\s*\<)|(^(std\b::\b)map\b\(\s*\<)'), - 'map<>', - '')) + (re.compile(r"(std\b::\bmap\s*\<)|(^(std\b::\b)map\b\(\s*\<)"), "map<>", "") +) # Other scripts may reach in and modify this pattern. _re_pattern_templates = [] for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: - for _template in _templates: - _re_pattern_templates.append( - (re.compile(r'((^|(^|\s|((^|\W)::))std::)|[^>.:]\b)' + _template + r'\s*\<'), - _template + '<>', - _header)) + for _template in _templates: + _re_pattern_templates.append( + ( + re.compile( + r"((^|(^|\s|((^|\W)::))std::)|[^>.:]\b)" + _template + r"\s*\<" + ), + _template + "<>", + _header, + ) + ) _re_pattern_types_or_objs = [] for _header, _types_or_objs in _HEADERS_TYPES_OR_OBJS: - for _type_or_obj in _types_or_objs: - _re_pattern_types_or_objs.append( - (re.compile(r'\b' + _type_or_obj + r'\b'), - _type_or_obj, - _header)) + for _type_or_obj in _types_or_objs: + _re_pattern_types_or_objs.append( + (re.compile(r"\b" + _type_or_obj + r"\b"), _type_or_obj, _header) + ) _re_pattern_functions = [] for _header, _functions in _HEADERS_FUNCTIONS: - for _function in _functions: - # Match printf(..., ...), but not foo->printf, foo.printf or - # 'type::printf()'. - _re_pattern_functions.append( - (re.compile(r'([^>.]|^)\b' + _function + r'\([^\)]'), - _function, - _header)) + for _function in _functions: + # Match printf(..., ...), but not foo->printf, foo.printf or + # 'type::printf()'. + _re_pattern_functions.append( + (re.compile(r"([^>.]|^)\b" + _function + r"\([^\)]"), _function, _header) + ) + def FilesBelongToSameModule(filename_cc, filename_h): - """Check if these two filenames belong to the same module. - - The concept of a 'module' here is a as follows: - foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the - same 'module' if they are in the same directory. - some/path/public/xyzzy and some/path/internal/xyzzy are also considered - to belong to the same module here. - - If the filename_cc contains a longer path than the filename_h, for example, - '/absolute/path/to/base/sysinfo.cc', and this file would include - 'base/sysinfo.h', this function also produces the prefix needed to open the - header. This is used by the caller of this function to more robustly open the - header file. We don't have access to the real include paths in this context, - so we need this guesswork here. - - Known bugs: tools/base/bar.cc and base/bar.h belong to the same module - according to this implementation. Because of this, this function gives - some false positives. This should be sufficiently rare in practice. - - Args: - filename_cc: is the path for the source (e.g. .cc) file - filename_h: is the path for the header path - - Returns: - Tuple with a bool and a string: - bool: True if filename_cc and filename_h belong to the same module. - string: the additional prefix needed to open the header file. - """ - fileinfo_cc = FileInfo(filename_cc) - if fileinfo_cc.Extension().lstrip('.') not in GetNonHeaderExtensions(): - return (False, '') - - fileinfo_h = FileInfo(filename_h) - if not IsHeaderExtension(fileinfo_h.Extension().lstrip('.')): - return (False, '') - - filename_cc = filename_cc[:-(len(fileinfo_cc.Extension()))] - matched_test_suffix = re.search(_TEST_FILE_SUFFIX, fileinfo_cc.BaseName()) - if matched_test_suffix: - filename_cc = filename_cc[:-len(matched_test_suffix.group(1))] - - filename_cc = filename_cc.replace('/public/', '/') - filename_cc = filename_cc.replace('/internal/', '/') - - filename_h = filename_h[:-(len(fileinfo_h.Extension()))] - if filename_h.endswith('-inl'): - filename_h = filename_h[:-len('-inl')] - filename_h = filename_h.replace('/public/', '/') - filename_h = filename_h.replace('/internal/', '/') - - files_belong_to_same_module = filename_cc.endswith(filename_h) - common_path = '' - if files_belong_to_same_module: - common_path = filename_cc[:-len(filename_h)] - return files_belong_to_same_module, common_path - - -def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, - io=codecs): - """Reports for missing stl includes. - - This function will output warnings to make sure you are including the headers - necessary for the stl containers and functions that you use. We only give one - reason to include a header. For example, if you use both equal_to<> and - less<> in a .h file, only one (the latter in the file) of these will be - reported as a reason to include the . - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - include_state: An _IncludeState instance. - error: The function to call with any errors found. - io: The IO factory to use to read the header file. Provided for unittest - injection. - """ - required = {} # A map of header name to linenumber and the template entity. - # Example of required: { '': (1219, 'less<>') } - - for linenum in range(clean_lines.NumLines()): - line = clean_lines.elided[linenum] - if not line or line[0] == '#': - continue - - _re_patterns = [] - _re_patterns.extend(_re_pattern_types_or_objs) - _re_patterns.extend(_re_pattern_functions) - for pattern, item, header in _re_patterns: - matched = pattern.search(line) - if matched: - # Don't warn about strings in non-STL namespaces: - # (We check only the first match per line; good enough.) - prefix = line[:matched.start()] - if prefix.endswith('std::') or not prefix.endswith('::'): - required[header] = (linenum, item) - - for pattern, template, header in _re_pattern_headers_maybe_templates: - if pattern.search(line): - required[header] = (linenum, template) - - # The following function is just a speed up, no semantics are changed. - if '<' not in line: # Reduces the cpu time usage by skipping lines. - continue - - for pattern, template, header in _re_pattern_templates: - matched = pattern.search(line) - if matched: - # Don't warn about IWYU in non-STL namespaces: - # (We check only the first match per line; good enough.) - prefix = line[:matched.start()] - if prefix.endswith('std::') or not prefix.endswith('::'): - required[header] = (linenum, template) - - # Let's flatten the include_state include_list and copy it into a dictionary. - include_dict = dict([item for sublist in include_state.include_list - for item in sublist]) - - # All the lines have been processed, report the errors found. - for required_header_unstripped in sorted(required, key=required.__getitem__): - template = required[required_header_unstripped][1] - if required_header_unstripped.strip('<>"') not in include_dict: - error(filename, required[required_header_unstripped][0], - 'build/include_what_you_use', 4, - 'Add #include ' + required_header_unstripped + ' for ' + template) - - -_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') + """Check if these two filenames belong to the same module. + + The concept of a 'module' here is a as follows: + foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the + same 'module' if they are in the same directory. + some/path/public/xyzzy and some/path/internal/xyzzy are also considered + to belong to the same module here. + + If the filename_cc contains a longer path than the filename_h, for example, + '/absolute/path/to/base/sysinfo.cc', and this file would include + 'base/sysinfo.h', this function also produces the prefix needed to open the + header. This is used by the caller of this function to more robustly open the + header file. We don't have access to the real include paths in this context, + so we need this guesswork here. + + Known bugs: tools/base/bar.cc and base/bar.h belong to the same module + according to this implementation. Because of this, this function gives + some false positives. This should be sufficiently rare in practice. + + Args: + filename_cc: is the path for the source (e.g. .cc) file + filename_h: is the path for the header path + + Returns: + Tuple with a bool and a string: + bool: True if filename_cc and filename_h belong to the same module. + string: the additional prefix needed to open the header file. + """ + fileinfo_cc = FileInfo(filename_cc) + if fileinfo_cc.Extension().lstrip(".") not in GetNonHeaderExtensions(): + return (False, "") + + fileinfo_h = FileInfo(filename_h) + if not IsHeaderExtension(fileinfo_h.Extension().lstrip(".")): + return (False, "") + + filename_cc = filename_cc[: -(len(fileinfo_cc.Extension()))] + matched_test_suffix = re.search(_TEST_FILE_SUFFIX, fileinfo_cc.BaseName()) + if matched_test_suffix: + filename_cc = filename_cc[: -len(matched_test_suffix.group(1))] + + filename_cc = filename_cc.replace("/public/", "/") + filename_cc = filename_cc.replace("/internal/", "/") + + filename_h = filename_h[: -(len(fileinfo_h.Extension()))] + if filename_h.endswith("-inl"): + filename_h = filename_h[: -len("-inl")] + filename_h = filename_h.replace("/public/", "/") + filename_h = filename_h.replace("/internal/", "/") + + files_belong_to_same_module = filename_cc.endswith(filename_h) + common_path = "" + if files_belong_to_same_module: + common_path = filename_cc[: -len(filename_h)] + return files_belong_to_same_module, common_path + + +def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, io=codecs): + """Reports for missing stl includes. + + This function will output warnings to make sure you are including the headers + necessary for the stl containers and functions that you use. We only give one + reason to include a header. For example, if you use both equal_to<> and + less<> in a .h file, only one (the latter in the file) of these will be + reported as a reason to include the . + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + include_state: An _IncludeState instance. + error: The function to call with any errors found. + io: The IO factory to use to read the header file. Provided for unittest + injection. + """ + required = {} # A map of header name to linenumber and the template entity. + # Example of required: { '': (1219, 'less<>') } + + for linenum in range(clean_lines.NumLines()): + line = clean_lines.elided[linenum] + if not line or line[0] == "#": + continue + + _re_patterns = [] + _re_patterns.extend(_re_pattern_types_or_objs) + _re_patterns.extend(_re_pattern_functions) + for pattern, item, header in _re_patterns: + matched = pattern.search(line) + if matched: + # Don't warn about strings in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[: matched.start()] + if prefix.endswith("std::") or not prefix.endswith("::"): + required[header] = (linenum, item) + + for pattern, template, header in _re_pattern_headers_maybe_templates: + if pattern.search(line): + required[header] = (linenum, template) + + # The following function is just a speed up, no semantics are changed. + if "<" not in line: # Reduces the cpu time usage by skipping lines. + continue + + for pattern, template, header in _re_pattern_templates: + matched = pattern.search(line) + if matched: + # Don't warn about IWYU in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[: matched.start()] + if prefix.endswith("std::") or not prefix.endswith("::"): + required[header] = (linenum, template) + + # Let's flatten the include_state include_list and copy it into a dictionary. + include_dict = dict( + [item for sublist in include_state.include_list for item in sublist] + ) + + # All the lines have been processed, report the errors found. + for required_header_unstripped in sorted(required, key=required.__getitem__): + template = required[required_header_unstripped][1] + if required_header_unstripped.strip('<>"') not in include_dict: + error( + filename, + required[required_header_unstripped][0], + "build/include_what_you_use", + 4, + "Add #include " + required_header_unstripped + " for " + template, + ) + + +_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r"\bmake_pair\s*<") def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): - """Check that make_pair's template arguments are deduced. - - G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are - specified explicitly, and such use isn't intended in any case. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) - if match: - error(filename, linenum, 'build/explicit_make_pair', - 4, # 4 = high confidence - 'For C++11-compatibility, omit template arguments from make_pair' - ' OR use pair directly OR if appropriate, construct a pair directly') + """Check that make_pair's template arguments are deduced. + + G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are + specified explicitly, and such use isn't intended in any case. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) + if match: + error( + filename, + linenum, + "build/explicit_make_pair", + 4, # 4 = high confidence + "For C++11-compatibility, omit template arguments from make_pair" + " OR use pair directly OR if appropriate, construct a pair directly", + ) def CheckRedundantVirtual(filename, clean_lines, linenum, error): - """Check if line contains a redundant "virtual" function-specifier. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Look for "virtual" on current line. - line = clean_lines.elided[linenum] - virtual = re.match(r'^(.*)(\bvirtual\b)(.*)$', line) - if not virtual: return - - # Ignore "virtual" keywords that are near access-specifiers. These - # are only used in class base-specifier and do not apply to member - # functions. - if (re.search(r'\b(public|protected|private)\s+$', virtual.group(1)) or - re.match(r'^\s+(public|protected|private)\b', virtual.group(3))): - return + """Check if line contains a redundant "virtual" function-specifier. - # Ignore the "virtual" keyword from virtual base classes. Usually - # there is a column on the same line in these cases (virtual base - # classes are rare in google3 because multiple inheritance is rare). - if re.match(r'^.*[^:]:[^:].*$', line): return - - # Look for the next opening parenthesis. This is the start of the - # parameter list (possibly on the next line shortly after virtual). - # TODO(unknown): doesn't work if there are virtual functions with - # decltype() or other things that use parentheses, but csearch suggests - # that this is rare. - end_col = -1 - end_line = -1 - start_col = len(virtual.group(2)) - for start_line in range(linenum, min(linenum + 3, clean_lines.NumLines())): - line = clean_lines.elided[start_line][start_col:] - parameter_list = re.match(r'^([^(]*)\(', line) - if parameter_list: - # Match parentheses to find the end of the parameter list - (_, end_line, end_col) = CloseExpression( - clean_lines, start_line, start_col + len(parameter_list.group(1))) - break - start_col = 0 - - if end_col < 0: - return # Couldn't find end of parameter list, give up - - # Look for "override" or "final" after the parameter list - # (possibly on the next few lines). - for i in range(end_line, min(end_line + 3, clean_lines.NumLines())): - line = clean_lines.elided[i][end_col:] - match = re.search(r'\b(override|final)\b', line) - if match: - error(filename, linenum, 'readability/inheritance', 4, - ('"virtual" is redundant since function is ' - f'already declared as "{match.group(1)}"')) + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for "virtual" on current line. + line = clean_lines.elided[linenum] + virtual = re.match(r"^(.*)(\bvirtual\b)(.*)$", line) + if not virtual: + return - # Set end_col to check whole lines after we are done with the - # first line. - end_col = 0 - if re.search(r'[^\w]\s*$', line): - break + # Ignore "virtual" keywords that are near access-specifiers. These + # are only used in class base-specifier and do not apply to member + # functions. + if re.search(r"\b(public|protected|private)\s+$", virtual.group(1)) or re.match( + r"^\s+(public|protected|private)\b", virtual.group(3) + ): + return + # Ignore the "virtual" keyword from virtual base classes. Usually + # there is a column on the same line in these cases (virtual base + # classes are rare in google3 because multiple inheritance is rare). + if re.match(r"^.*[^:]:[^:].*$", line): + return -def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): - """Check if line contains a redundant "override" or "final" virt-specifier. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - # Look for closing parenthesis nearby. We need one to confirm where - # the declarator ends and where the virt-specifier starts to avoid - # false positives. - line = clean_lines.elided[linenum] - declarator_end = line.rfind(')') - if declarator_end >= 0: - fragment = line[declarator_end:] - else: - if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0: - fragment = line - else: - return + # Look for the next opening parenthesis. This is the start of the + # parameter list (possibly on the next line shortly after virtual). + # TODO(unknown): doesn't work if there are virtual functions with + # decltype() or other things that use parentheses, but csearch suggests + # that this is rare. + end_col = -1 + end_line = -1 + start_col = len(virtual.group(2)) + for start_line in range(linenum, min(linenum + 3, clean_lines.NumLines())): + line = clean_lines.elided[start_line][start_col:] + parameter_list = re.match(r"^([^(]*)\(", line) + if parameter_list: + # Match parentheses to find the end of the parameter list + (_, end_line, end_col) = CloseExpression( + clean_lines, start_line, start_col + len(parameter_list.group(1)) + ) + break + start_col = 0 + + if end_col < 0: + return # Couldn't find end of parameter list, give up + + # Look for "override" or "final" after the parameter list + # (possibly on the next few lines). + for i in range(end_line, min(end_line + 3, clean_lines.NumLines())): + line = clean_lines.elided[i][end_col:] + match = re.search(r"\b(override|final)\b", line) + if match: + error( + filename, + linenum, + "readability/inheritance", + 4, + ( + '"virtual" is redundant since function is ' + f'already declared as "{match.group(1)}"' + ), + ) + + # Set end_col to check whole lines after we are done with the + # first line. + end_col = 0 + if re.search(r"[^\w]\s*$", line): + break - # Check that at most one of "override" or "final" is present, not both - if re.search(r'\boverride\b', fragment) and re.search(r'\bfinal\b', fragment): - error(filename, linenum, 'readability/inheritance', 4, - ('"override" is redundant since function is ' - 'already declared as "final"')) +def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): + """Check if line contains a redundant "override" or "final" virt-specifier. + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for closing parenthesis nearby. We need one to confirm where + # the declarator ends and where the virt-specifier starts to avoid + # false positives. + line = clean_lines.elided[linenum] + declarator_end = line.rfind(")") + if declarator_end >= 0: + fragment = line[declarator_end:] + else: + if linenum > 1 and clean_lines.elided[linenum - 1].rfind(")") >= 0: + fragment = line + else: + return + + # Check that at most one of "override" or "final" is present, not both + if re.search(r"\boverride\b", fragment) and re.search(r"\bfinal\b", fragment): + error( + filename, + linenum, + "readability/inheritance", + 4, + ( + '"override" is redundant since function is ' + 'already declared as "final"' + ), + ) # Returns true if we are at a new block, and it is directly # inside of a namespace. def IsBlockInNameSpace(nesting_state, is_forward_declaration): - """Checks that the new block is directly in a namespace. - - Args: - nesting_state: The _NestingState object that contains info about our state. - is_forward_declaration: If the class is a forward declared class. - Returns: - Whether or not the new block is directly in a namespace. - """ - if is_forward_declaration: - return len(nesting_state.stack) >= 1 and ( - isinstance(nesting_state.stack[-1], _NamespaceInfo)) - - if len(nesting_state.stack) >= 1: - if isinstance(nesting_state.stack[-1], _NamespaceInfo): - return True - elif (len(nesting_state.stack) > 1 and - isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and - isinstance(nesting_state.stack[-2], _NamespaceInfo)): - return True - return False - - -def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, - raw_lines_no_comments, linenum): - """This method determines if we should apply our namespace indentation check. - - Args: - nesting_state: The current nesting state. - is_namespace_indent_item: If we just put a new class on the stack, True. - If the top of the stack is not a class, or we did not recently - add the class, False. - raw_lines_no_comments: The lines without the comments. - linenum: The current line number we are processing. - - Returns: - True if we should apply our namespace indentation check. Currently, it - only works for classes and namespaces inside of a namespace. - """ - - is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, - linenum) - - if not (is_namespace_indent_item or is_forward_declaration): - return False + """Checks that the new block is directly in a namespace. - # If we are in a macro, we do not want to check the namespace indentation. - if IsMacroDefinition(raw_lines_no_comments, linenum): + Args: + nesting_state: The _NestingState object that contains info about our state. + is_forward_declaration: If the class is a forward declared class. + Returns: + Whether or not the new block is directly in a namespace. + """ + if is_forward_declaration: + return len(nesting_state.stack) >= 1 and ( + isinstance(nesting_state.stack[-1], _NamespaceInfo) + ) + + if len(nesting_state.stack) >= 1: + if isinstance(nesting_state.stack[-1], _NamespaceInfo): + return True + elif ( + len(nesting_state.stack) > 1 + and isinstance(nesting_state.previous_stack_top, _NamespaceInfo) + and isinstance(nesting_state.stack[-2], _NamespaceInfo) + ): + return True return False - return IsBlockInNameSpace(nesting_state, is_forward_declaration) + +def ShouldCheckNamespaceIndentation( + nesting_state, is_namespace_indent_item, raw_lines_no_comments, linenum +): + """This method determines if we should apply our namespace indentation check. + + Args: + nesting_state: The current nesting state. + is_namespace_indent_item: If we just put a new class on the stack, True. + If the top of the stack is not a class, or we did not recently + add the class, False. + raw_lines_no_comments: The lines without the comments. + linenum: The current line number we are processing. + + Returns: + True if we should apply our namespace indentation check. Currently, it + only works for classes and namespaces inside of a namespace. + """ + + is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, linenum) + + if not (is_namespace_indent_item or is_forward_declaration): + return False + + # If we are in a macro, we do not want to check the namespace indentation. + if IsMacroDefinition(raw_lines_no_comments, linenum): + return False + + return IsBlockInNameSpace(nesting_state, is_forward_declaration) # Call this method if the line is directly inside of a namespace. # If the line above is blank (excluding comments) or the start of # an inner namespace, it cannot be indented. -def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, - error): - # JMM: clang-format and cpplint disagree what "indenting in a - # namespace means - return - line = raw_lines_no_comments[linenum] - if re.match(r'^\s+', line): - error(filename, linenum, 'whitespace/indent_namespace', 4, - 'Do not indent within a namespace.') - - -def ProcessLine(filename, file_extension, clean_lines, line, - include_state, function_state, nesting_state, error, - extra_check_functions=None): - """Processes a single line in the file. - - Args: - filename: Filename of the file that is being processed. - file_extension: The extension (dot not included) of the file. - clean_lines: An array of strings, each representing a line of the file, - with comments stripped. - line: Number of line being processed. - include_state: An _IncludeState instance in which the headers are inserted. - function_state: A _FunctionState instance which counts function lines, etc. - nesting_state: A NestingState instance which maintains information about - the current stack of nested blocks being parsed. - error: A callable to which errors are reported, which takes 4 arguments: - filename, line number, error level, and message - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - raw_lines = clean_lines.raw_lines - ParseNolintSuppressions(filename, raw_lines[line], line, error) - nesting_state.Update(filename, clean_lines, line, error) - CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, - error) - if nesting_state.InAsmBlock(): return - CheckForFunctionLengths(filename, clean_lines, line, function_state, error) - CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) - CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) - CheckLanguage(filename, clean_lines, line, file_extension, include_state, - nesting_state, error) - CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) - CheckForNonStandardConstructs(filename, clean_lines, line, - nesting_state, error) - CheckVlogArguments(filename, clean_lines, line, error) - CheckPosixThreading(filename, clean_lines, line, error) - CheckInvalidIncrement(filename, clean_lines, line, error) - CheckMakePairUsesDeduction(filename, clean_lines, line, error) - CheckRedundantVirtual(filename, clean_lines, line, error) - CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) - if extra_check_functions: - for check_fn in extra_check_functions: - check_fn(filename, clean_lines, line, error) +def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, error): + # JMM: clang-format and cpplint disagree what "indenting in a + # namespace means + return + line = raw_lines_no_comments[linenum] + if re.match(r"^\s+", line): + error( + filename, + linenum, + "whitespace/indent_namespace", + 4, + "Do not indent within a namespace.", + ) + + +def ProcessLine( + filename, + file_extension, + clean_lines, + line, + include_state, + function_state, + nesting_state, + error, + extra_check_functions=None, +): + """Processes a single line in the file. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + clean_lines: An array of strings, each representing a line of the file, + with comments stripped. + line: Number of line being processed. + include_state: An _IncludeState instance in which the headers are inserted. + function_state: A _FunctionState instance which counts function lines, etc. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[line], line, error) + nesting_state.Update(filename, clean_lines, line, error) + CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, error) + if nesting_state.InAsmBlock(): + return + CheckForFunctionLengths(filename, clean_lines, line, function_state, error) + CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) + CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) + CheckLanguage( + filename, clean_lines, line, file_extension, include_state, nesting_state, error + ) + CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) + CheckForNonStandardConstructs(filename, clean_lines, line, nesting_state, error) + CheckVlogArguments(filename, clean_lines, line, error) + CheckPosixThreading(filename, clean_lines, line, error) + CheckInvalidIncrement(filename, clean_lines, line, error) + CheckMakePairUsesDeduction(filename, clean_lines, line, error) + CheckRedundantVirtual(filename, clean_lines, line, error) + CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) + if extra_check_functions: + for check_fn in extra_check_functions: + check_fn(filename, clean_lines, line, error) def FlagCxxHeaders(filename, clean_lines, linenum, error): - """Flag C++ headers that the styleguide restricts. - - Args: - filename: The name of the current file. - clean_lines: A CleansedLines instance containing the file. - linenum: The number of the line to check. - error: The function to call with any errors found. - """ - line = clean_lines.elided[linenum] - - include = re.match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) - - # Flag unapproved C++11 headers. - if include and include.group(1) in ('cfenv', - 'fenv.h', - 'ratio', - ): - error(filename, linenum, 'build/c++11', 5, - f"<{include.group(1)}> is an unapproved C++11 header.") - - # filesystem is the only unapproved C++17 header - if include and include.group(1) == 'filesystem': - error(filename, linenum, 'build/c++17', 5, - " is an unapproved C++17 header.") - - -def ProcessFileData(filename, file_extension, lines, error, - extra_check_functions=None): - """Performs lint checks and reports any errors to the given error function. - - Args: - filename: Filename of the file that is being processed. - file_extension: The extension (dot not included) of the file. - lines: An array of strings, each representing a line of the file, with the - last element being empty if the file is terminated with a newline. - error: A callable to which errors are reported, which takes 4 arguments: - filename, line number, error level, and message - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ - lines = (['// marker so line numbers and indices both start at 1'] + lines + - ['// marker so line numbers end in a known way']) - - include_state = _IncludeState() - function_state = _FunctionState() - nesting_state = NestingState() - - ResetNolintSuppressions() - - CheckForCopyright(filename, lines, error) - ProcessGlobalSuppressions(lines) - RemoveMultiLineComments(filename, lines, error) - clean_lines = CleansedLines(lines) - - if IsHeaderExtension(file_extension): - CheckForHeaderGuard(filename, clean_lines, error) - - for line in range(clean_lines.NumLines()): - ProcessLine(filename, file_extension, clean_lines, line, - include_state, function_state, nesting_state, error, - extra_check_functions) - FlagCxxHeaders(filename, clean_lines, line, error) - if _error_suppressions.HasOpenBlock(): - error(filename, _error_suppressions.GetOpenBlockStart(), 'readability/nolint', 5, - 'NONLINT block never ended') - - CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) - - # Check that the .cc file has included its header if it exists. - if _IsSourceExtension(file_extension): - CheckHeaderFileIncluded(filename, include_state, error) - - # We check here rather than inside ProcessLine so that we see raw - # lines rather than "cleaned" lines. - CheckForBadCharacters(filename, lines, error) - - CheckForNewlineAtEOF(filename, lines, error) + """Flag C++ headers that the styleguide restricts. -def ProcessConfigOverrides(filename): - """ Loads the configuration files and processes the config overrides. + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] - Args: - filename: The name of the file being processed by the linter. + include = re.match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++11 headers. + if include and include.group(1) in ( + "cfenv", + "fenv.h", + "ratio", + ): + error( + filename, + linenum, + "build/c++11", + 5, + f"<{include.group(1)}> is an unapproved C++11 header.", + ) + + # filesystem is the only unapproved C++17 header + if include and include.group(1) == "filesystem": + error( + filename, + linenum, + "build/c++17", + 5, + " is an unapproved C++17 header.", + ) + + +def ProcessFileData(filename, file_extension, lines, error, extra_check_functions=None): + """Performs lint checks and reports any errors to the given error function. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + lines = ( + ["// marker so line numbers and indices both start at 1"] + + lines + + ["// marker so line numbers end in a known way"] + ) - Returns: - False if the current |filename| should not be processed further. - """ + include_state = _IncludeState() + function_state = _FunctionState() + nesting_state = NestingState() + + ResetNolintSuppressions() + + CheckForCopyright(filename, lines, error) + ProcessGlobalSuppressions(lines) + RemoveMultiLineComments(filename, lines, error) + clean_lines = CleansedLines(lines) + + if IsHeaderExtension(file_extension): + CheckForHeaderGuard(filename, clean_lines, error) + + for line in range(clean_lines.NumLines()): + ProcessLine( + filename, + file_extension, + clean_lines, + line, + include_state, + function_state, + nesting_state, + error, + extra_check_functions, + ) + FlagCxxHeaders(filename, clean_lines, line, error) + if _error_suppressions.HasOpenBlock(): + error( + filename, + _error_suppressions.GetOpenBlockStart(), + "readability/nolint", + 5, + "NONLINT block never ended", + ) + + CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) + + # Check that the .cc file has included its header if it exists. + if _IsSourceExtension(file_extension): + CheckHeaderFileIncluded(filename, include_state, error) + + # We check here rather than inside ProcessLine so that we see raw + # lines rather than "cleaned" lines. + CheckForBadCharacters(filename, lines, error) + + CheckForNewlineAtEOF(filename, lines, error) - abs_filename = os.path.abspath(filename) - cfg_filters = [] - keep_looking = True - while keep_looking: - abs_path, base_name = os.path.split(abs_filename) - if not base_name: - break # Reached the root directory. - cfg_file = os.path.join(abs_path, _config_filename) - abs_filename = abs_path - if not os.path.isfile(cfg_file): - continue +def ProcessConfigOverrides(filename): + """Loads the configuration files and processes the config overrides. - try: - with codecs.open(cfg_file, 'r', 'utf8', 'replace') as file_handle: - for line in file_handle: - line, _, _ = line.partition('#') # Remove comments. - if not line.strip(): + Args: + filename: The name of the file being processed by the linter. + + Returns: + False if the current |filename| should not be processed further. + """ + + abs_filename = os.path.abspath(filename) + cfg_filters = [] + keep_looking = True + while keep_looking: + abs_path, base_name = os.path.split(abs_filename) + if not base_name: + break # Reached the root directory. + + cfg_file = os.path.join(abs_path, _config_filename) + abs_filename = abs_path + if not os.path.isfile(cfg_file): continue - name, _, val = line.partition('=') - name = name.strip() - val = val.strip() - if name == 'set noparent': - keep_looking = False - elif name == 'filter': - cfg_filters.append(val) - elif name == 'exclude_files': - # When matching exclude_files pattern, use the base_name of - # the current file name or the directory name we are processing. - # For example, if we are checking for lint errors in /foo/bar/baz.cc - # and we found the .cfg file at /foo/CPPLINT.cfg, then the config - # file's "exclude_files" filter is meant to be checked against "bar" - # and not "baz" nor "bar/baz.cc". - if base_name: - pattern = re.compile(val) - if pattern.match(base_name): - if _cpplint_state.quiet: - # Suppress "Ignoring file" warning when using --quiet. - return False - _cpplint_state.PrintInfo(f'Ignoring "{filename}": file excluded by "{cfg_file}". ' - 'File path component "%s" matches ' - 'pattern "%s"\n' % - (base_name, val)) - return False - elif name == 'linelength': - global _line_length - try: - _line_length = int(val) - except ValueError: - _cpplint_state.PrintError('Line length must be numeric.') - elif name == 'extensions': - ProcessExtensionsOption(val) - elif name == 'root': - global _root - # root directories are specified relative to CPPLINT.cfg dir. - _root = os.path.join(os.path.dirname(cfg_file), val) - elif name == 'headers': - ProcessHppHeadersOption(val) - elif name == 'includeorder': - ProcessIncludeOrderOption(val) - else: + try: + with codecs.open(cfg_file, "r", "utf8", "replace") as file_handle: + for line in file_handle: + line, _, _ = line.partition("#") # Remove comments. + if not line.strip(): + continue + + name, _, val = line.partition("=") + name = name.strip() + val = val.strip() + if name == "set noparent": + keep_looking = False + elif name == "filter": + cfg_filters.append(val) + elif name == "exclude_files": + # When matching exclude_files pattern, use the base_name of + # the current file name or the directory name we are processing. + # For example, if we are checking for lint errors in /foo/bar/baz.cc + # and we found the .cfg file at /foo/CPPLINT.cfg, then the config + # file's "exclude_files" filter is meant to be checked against "bar" + # and not "baz" nor "bar/baz.cc". + if base_name: + pattern = re.compile(val) + if pattern.match(base_name): + if _cpplint_state.quiet: + # Suppress "Ignoring file" warning when using --quiet. + return False + _cpplint_state.PrintInfo( + f'Ignoring "{filename}": file excluded by "{cfg_file}". ' + 'File path component "%s" matches ' + 'pattern "%s"\n' % (base_name, val) + ) + return False + elif name == "linelength": + global _line_length + try: + _line_length = int(val) + except ValueError: + _cpplint_state.PrintError("Line length must be numeric.") + elif name == "extensions": + ProcessExtensionsOption(val) + elif name == "root": + global _root + # root directories are specified relative to CPPLINT.cfg dir. + _root = os.path.join(os.path.dirname(cfg_file), val) + elif name == "headers": + ProcessHppHeadersOption(val) + elif name == "includeorder": + ProcessIncludeOrderOption(val) + else: + _cpplint_state.PrintError( + f"Invalid configuration option ({name}) in file {cfg_file}\n" + ) + + except IOError: _cpplint_state.PrintError( - f'Invalid configuration option ({name}) in file {cfg_file}\n') - - except IOError: - _cpplint_state.PrintError( - f"Skipping config file '{cfg_file}': Can't open for reading\n") - keep_looking = False + f"Skipping config file '{cfg_file}': Can't open for reading\n" + ) + keep_looking = False - # Apply all the accumulated filters in reverse order (top-level directory - # config options having the least priority). - for cfg_filter in reversed(cfg_filters): - _AddFilters(cfg_filter) + # Apply all the accumulated filters in reverse order (top-level directory + # config options having the least priority). + for cfg_filter in reversed(cfg_filters): + _AddFilters(cfg_filter) - return True + return True def ProcessFile(filename, vlevel, extra_check_functions=None): - """Does google-lint on a single file. + """Does google-lint on a single file. + + Args: + filename: The name of the file to parse. - Args: - filename: The name of the file to parse. + vlevel: The level of errors to report. Every error of confidence + >= verbose_level will be reported. 0 is a good default. - vlevel: The level of errors to report. Every error of confidence - >= verbose_level will be reported. 0 is a good default. + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ - extra_check_functions: An array of additional check functions that will be - run on each source line. Each function takes 4 - arguments: filename, clean_lines, line, error - """ + _SetVerboseLevel(vlevel) + _BackupFilters() + old_errors = _cpplint_state.error_count - _SetVerboseLevel(vlevel) - _BackupFilters() - old_errors = _cpplint_state.error_count + if not ProcessConfigOverrides(filename): + _RestoreFilters() + return - if not ProcessConfigOverrides(filename): - _RestoreFilters() - return + lf_lines = [] + crlf_lines = [] + try: + # Support the UNIX convention of using "-" for stdin. Note that + # we are not opening the file with universal newline support + # (which codecs doesn't support anyway), so the resulting lines do + # contain trailing '\r' characters if we are reading a file that + # has CRLF endings. + # If after the split a trailing '\r' is present, it is removed + # below. + if filename == "-": + lines = ( + codecs.StreamReaderWriter( + sys.stdin, + codecs.getreader("utf8"), + codecs.getwriter("utf8"), + "replace", + ) + .read() + .split("\n") + ) + else: + with codecs.open(filename, "r", "utf8", "replace") as target_file: + lines = target_file.read().split("\n") + + # Remove trailing '\r'. + # The -1 accounts for the extra trailing blank line we get from split() + for linenum in range(len(lines) - 1): + if lines[linenum].endswith("\r"): + lines[linenum] = lines[linenum].rstrip("\r") + crlf_lines.append(linenum + 1) + else: + lf_lines.append(linenum + 1) + + except IOError: + # TODO: Maybe make this have an exit code of 2 after all is done + _cpplint_state.PrintError( + f"Skipping input '{filename}': Can't open for reading\n" + ) + _RestoreFilters() + return + + # Note, if no dot is found, this will give the entire filename as the ext. + file_extension = filename[filename.rfind(".") + 1 :] - lf_lines = [] - crlf_lines = [] - try: - # Support the UNIX convention of using "-" for stdin. Note that - # we are not opening the file with universal newline support - # (which codecs doesn't support anyway), so the resulting lines do - # contain trailing '\r' characters if we are reading a file that - # has CRLF endings. - # If after the split a trailing '\r' is present, it is removed - # below. - if filename == '-': - lines = codecs.StreamReaderWriter(sys.stdin, - codecs.getreader('utf8'), - codecs.getwriter('utf8'), - 'replace').read().split('\n') + # When reading from stdin, the extension is unknown, so no cpplint tests + # should rely on the extension. + if filename != "-" and file_extension not in GetAllExtensions(): + _cpplint_state.PrintError( + f"Ignoring {filename}; not a valid file name" + f' ({(", ".join(GetAllExtensions()))})\n' + ) else: - with codecs.open(filename, 'r', 'utf8', 'replace') as target_file: - lines = target_file.read().split('\n') - - # Remove trailing '\r'. - # The -1 accounts for the extra trailing blank line we get from split() - for linenum in range(len(lines) - 1): - if lines[linenum].endswith('\r'): - lines[linenum] = lines[linenum].rstrip('\r') - crlf_lines.append(linenum + 1) - else: - lf_lines.append(linenum + 1) - - except IOError: - # TODO: Maybe make this have an exit code of 2 after all is done - _cpplint_state.PrintError( - f"Skipping input '{filename}': Can't open for reading\n") + ProcessFileData(filename, file_extension, lines, Error, extra_check_functions) + + # If end-of-line sequences are a mix of LF and CR-LF, issue + # warnings on the lines with CR. + # + # Don't issue any warnings if all lines are uniformly LF or CR-LF, + # since critique can handle these just fine, and the style guide + # doesn't dictate a particular end of line sequence. + # + # We can't depend on os.linesep to determine what the desired + # end-of-line sequence should be, since that will return the + # server-side end-of-line sequence. + if lf_lines and crlf_lines: + # Warn on every line with CR. An alternative approach might be to + # check whether the file is mostly CRLF or just LF, and warn on the + # minority, we bias toward LF here since most tools prefer LF. + for linenum in crlf_lines: + Error( + filename, + linenum, + "whitespace/newline", + 1, + "Unexpected \\r (^M) found; better to use only \\n", + ) + + # Suppress printing anything if --quiet was passed unless the error + # count has increased after processing this file. + if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count: + _cpplint_state.PrintInfo(f"Done processing {filename}\n") _RestoreFilters() - return - # Note, if no dot is found, this will give the entire filename as the ext. - file_extension = filename[filename.rfind('.') + 1:] - # When reading from stdin, the extension is unknown, so no cpplint tests - # should rely on the extension. - if filename != '-' and file_extension not in GetAllExtensions(): - _cpplint_state.PrintError(f'Ignoring {filename}; not a valid file name' - f' ({(", ".join(GetAllExtensions()))})\n') - else: - ProcessFileData(filename, file_extension, lines, Error, - extra_check_functions) +def PrintUsage(message): + """Prints a brief usage string and exits, optionally with an error message. - # If end-of-line sequences are a mix of LF and CR-LF, issue - # warnings on the lines with CR. - # - # Don't issue any warnings if all lines are uniformly LF or CR-LF, - # since critique can handle these just fine, and the style guide - # doesn't dictate a particular end of line sequence. - # - # We can't depend on os.linesep to determine what the desired - # end-of-line sequence should be, since that will return the - # server-side end-of-line sequence. - if lf_lines and crlf_lines: - # Warn on every line with CR. An alternative approach might be to - # check whether the file is mostly CRLF or just LF, and warn on the - # minority, we bias toward LF here since most tools prefer LF. - for linenum in crlf_lines: - Error(filename, linenum, 'whitespace/newline', 1, - 'Unexpected \\r (^M) found; better to use only \\n') - - # Suppress printing anything if --quiet was passed unless the error - # count has increased after processing this file. - if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count: - _cpplint_state.PrintInfo(f'Done processing {filename}\n') - _RestoreFilters() + Args: + message: The optional error message. + """ + sys.stderr.write( + _USAGE + % ( + sorted(list(GetAllExtensions())), + ",".join(sorted(list(GetAllExtensions()))), + sorted(GetHeaderExtensions()), + ",".join(sorted(GetHeaderExtensions())), + ) + ) + if message: + sys.exit("\nFATAL ERROR: " + message) + else: + sys.exit(0) -def PrintUsage(message): - """Prints a brief usage string and exits, optionally with an error message. - - Args: - message: The optional error message. - """ - sys.stderr.write(_USAGE % (sorted(list(GetAllExtensions())), - ','.join(sorted(list(GetAllExtensions()))), - sorted(GetHeaderExtensions()), - ','.join(sorted(GetHeaderExtensions())))) - - if message: - sys.exit('\nFATAL ERROR: ' + message) - else: - sys.exit(0) def PrintVersion(): - sys.stdout.write('Cpplint fork (https://github.com/cpplint/cpplint)\n') - sys.stdout.write('cpplint ' + __VERSION__ + '\n') - sys.stdout.write('Python ' + sys.version + '\n') - sys.exit(0) + sys.stdout.write("Cpplint fork (https://github.com/cpplint/cpplint)\n") + sys.stdout.write("cpplint " + __VERSION__ + "\n") + sys.stdout.write("Python " + sys.version + "\n") + sys.exit(0) + def PrintCategories(): - """Prints a list of all the error-categories used by error messages. + """Prints a list of all the error-categories used by error messages. - These are the categories used to filter messages via --filter. - """ - sys.stderr.write(''.join(f' {cat}\n' for cat in _ERROR_CATEGORIES)) - sys.exit(0) + These are the categories used to filter messages via --filter. + """ + sys.stderr.write("".join(f" {cat}\n" for cat in _ERROR_CATEGORIES)) + sys.exit(0) def ParseArguments(args): - """Parses the command line arguments. - - This may set the output format and verbosity level as side-effects. - - Args: - args: The command line arguments: - - Returns: - The list of filenames to lint. - """ - try: - (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', - 'v=', - 'version', - 'counting=', - 'filter=', - 'root=', - 'repository=', - 'linelength=', - 'extensions=', - 'exclude=', - 'recursive', - 'headers=', - 'includeorder=', - 'config=', - 'quiet']) - except getopt.GetoptError: - PrintUsage('Invalid arguments.') - - verbosity = _VerboseLevel() - output_format = _OutputFormat() - filters = '' - quiet = _Quiet() - counting_style = '' - recursive = False - - for (opt, val) in opts: - if opt == '--help': - PrintUsage(None) - if opt == '--version': - PrintVersion() - elif opt == '--output': - if val not in ('emacs', 'vs7', 'eclipse', 'junit', 'sed', 'gsed'): - PrintUsage('The only allowed output formats are emacs, vs7, eclipse ' - 'sed, gsed and junit.') - output_format = val - elif opt == '--quiet': - quiet = True - elif opt == '--verbose' or opt == '--v': - verbosity = int(val) - elif opt == '--filter': - filters = val - if not filters: - PrintCategories() - elif opt == '--counting': - if val not in ('total', 'toplevel', 'detailed'): - PrintUsage('Valid counting options are total, toplevel, and detailed') - counting_style = val - elif opt == '--root': - global _root - _root = val - elif opt == '--repository': - global _repository - _repository = val - elif opt == '--linelength': - global _line_length - try: - _line_length = int(val) - except ValueError: - PrintUsage('Line length must be digits.') - elif opt == '--exclude': - global _excludes - if not _excludes: - _excludes = set() - _excludes.update(glob.glob(val)) - elif opt == '--extensions': - ProcessExtensionsOption(val) - elif opt == '--headers': - ProcessHppHeadersOption(val) - elif opt == '--recursive': - recursive = True - elif opt == '--includeorder': - ProcessIncludeOrderOption(val) - elif opt == '--config': - global _config_filename - _config_filename = val - if os.path.basename(_config_filename) != _config_filename: - PrintUsage('Config file name must not include directory components.') - - if not filenames: - PrintUsage('No files were specified.') - - if recursive: - filenames = _ExpandDirectories(filenames) - - if _excludes: - filenames = _FilterExcludedFiles(filenames) - - _SetOutputFormat(output_format) - _SetQuiet(quiet) - _SetVerboseLevel(verbosity) - _SetFilters(filters) - _SetCountingStyle(counting_style) - - filenames.sort() - return filenames + """Parses the command line arguments. + + This may set the output format and verbosity level as side-effects. + + Args: + args: The command line arguments: + + Returns: + The list of filenames to lint. + """ + try: + (opts, filenames) = getopt.getopt( + args, + "", + [ + "help", + "output=", + "verbose=", + "v=", + "version", + "counting=", + "filter=", + "root=", + "repository=", + "linelength=", + "extensions=", + "exclude=", + "recursive", + "headers=", + "includeorder=", + "config=", + "quiet", + ], + ) + except getopt.GetoptError: + PrintUsage("Invalid arguments.") + + verbosity = _VerboseLevel() + output_format = _OutputFormat() + filters = "" + quiet = _Quiet() + counting_style = "" + recursive = False + + for opt, val in opts: + if opt == "--help": + PrintUsage(None) + if opt == "--version": + PrintVersion() + elif opt == "--output": + if val not in ("emacs", "vs7", "eclipse", "junit", "sed", "gsed"): + PrintUsage( + "The only allowed output formats are emacs, vs7, eclipse " + "sed, gsed and junit." + ) + output_format = val + elif opt == "--quiet": + quiet = True + elif opt == "--verbose" or opt == "--v": + verbosity = int(val) + elif opt == "--filter": + filters = val + if not filters: + PrintCategories() + elif opt == "--counting": + if val not in ("total", "toplevel", "detailed"): + PrintUsage("Valid counting options are total, toplevel, and detailed") + counting_style = val + elif opt == "--root": + global _root + _root = val + elif opt == "--repository": + global _repository + _repository = val + elif opt == "--linelength": + global _line_length + try: + _line_length = int(val) + except ValueError: + PrintUsage("Line length must be digits.") + elif opt == "--exclude": + global _excludes + if not _excludes: + _excludes = set() + _excludes.update(glob.glob(val)) + elif opt == "--extensions": + ProcessExtensionsOption(val) + elif opt == "--headers": + ProcessHppHeadersOption(val) + elif opt == "--recursive": + recursive = True + elif opt == "--includeorder": + ProcessIncludeOrderOption(val) + elif opt == "--config": + global _config_filename + _config_filename = val + if os.path.basename(_config_filename) != _config_filename: + PrintUsage("Config file name must not include directory components.") + + if not filenames: + PrintUsage("No files were specified.") + + if recursive: + filenames = _ExpandDirectories(filenames) + + if _excludes: + filenames = _FilterExcludedFiles(filenames) + + _SetOutputFormat(output_format) + _SetQuiet(quiet) + _SetVerboseLevel(verbosity) + _SetFilters(filters) + _SetCountingStyle(counting_style) + + filenames.sort() + return filenames + def _ParseFilterSelector(parameter): - """Parses the given command line parameter for file- and line-specific - exclusions. - readability/casting:file.cpp - readability/casting:file.cpp:43 - - Args: - parameter: The parameter value of --filter - - Returns: - [category, filename, line]. - Category is always given. - Filename is either a filename or empty if all files are meant. - Line is either a line in filename or -1 if all lines are meant. - """ - colon_pos = parameter.find(":") - if colon_pos == -1: - return parameter, "", -1 - category = parameter[:colon_pos] - second_colon_pos = parameter.find(":", colon_pos + 1) - if second_colon_pos == -1: - return category, parameter[colon_pos + 1:], -1 - else: - return category, parameter[colon_pos + 1: second_colon_pos], \ - int(parameter[second_colon_pos + 1:]) + """Parses the given command line parameter for file- and line-specific + exclusions. + readability/casting:file.cpp + readability/casting:file.cpp:43 + + Args: + parameter: The parameter value of --filter + + Returns: + [category, filename, line]. + Category is always given. + Filename is either a filename or empty if all files are meant. + Line is either a line in filename or -1 if all lines are meant. + """ + colon_pos = parameter.find(":") + if colon_pos == -1: + return parameter, "", -1 + category = parameter[:colon_pos] + second_colon_pos = parameter.find(":", colon_pos + 1) + if second_colon_pos == -1: + return category, parameter[colon_pos + 1 :], -1 + else: + return ( + category, + parameter[colon_pos + 1 : second_colon_pos], + int(parameter[second_colon_pos + 1 :]), + ) + def _ExpandDirectories(filenames): - """Searches a list of filenames and replaces directories in the list with - all files descending from those directories. Files with extensions not in - the valid extensions list are excluded. - - Args: - filenames: A list of files or directories - - Returns: - A list of all files that are members of filenames or descended from a - directory in filenames - """ - expanded = set() - for filename in filenames: - if not os.path.isdir(filename): - expanded.add(filename) - continue - - for root, _, files in os.walk(filename): - for loopfile in files: - fullname = os.path.join(root, loopfile) - if fullname.startswith('.' + os.path.sep): - fullname = fullname[len('.' + os.path.sep):] - expanded.add(fullname) - - filtered = [] - for filename in expanded: - if os.path.splitext(filename)[1][1:] in GetAllExtensions(): - filtered.append(filename) - return filtered + """Searches a list of filenames and replaces directories in the list with + all files descending from those directories. Files with extensions not in + the valid extensions list are excluded. + + Args: + filenames: A list of files or directories + + Returns: + A list of all files that are members of filenames or descended from a + directory in filenames + """ + expanded = set() + for filename in filenames: + if not os.path.isdir(filename): + expanded.add(filename) + continue + + for root, _, files in os.walk(filename): + for loopfile in files: + fullname = os.path.join(root, loopfile) + if fullname.startswith("." + os.path.sep): + fullname = fullname[len("." + os.path.sep) :] + expanded.add(fullname) + + filtered = [] + for filename in expanded: + if os.path.splitext(filename)[1][1:] in GetAllExtensions(): + filtered.append(filename) + return filtered + def _FilterExcludedFiles(fnames): - """Filters out files listed in the --exclude command line switch. File paths - in the switch are evaluated relative to the current working directory - """ - exclude_paths = [os.path.abspath(f) for f in _excludes] - # because globbing does not work recursively, exclude all subpath of all excluded entries - return [f for f in fnames - if not any(e for e in exclude_paths - if _IsParentOrSame(e, os.path.abspath(f)))] + """Filters out files listed in the --exclude command line switch. File paths + in the switch are evaluated relative to the current working directory + """ + exclude_paths = [os.path.abspath(f) for f in _excludes] + # because globbing does not work recursively, exclude all subpath of all excluded entries + return [ + f + for f in fnames + if not any(e for e in exclude_paths if _IsParentOrSame(e, os.path.abspath(f))) + ] + def _IsParentOrSame(parent, child): - """Return true if child is subdirectory of parent. - Assumes both paths are absolute and don't contain symlinks. - """ - parent = os.path.normpath(parent) - child = os.path.normpath(child) - if parent == child: - return True + """Return true if child is subdirectory of parent. + Assumes both paths are absolute and don't contain symlinks. + """ + parent = os.path.normpath(parent) + child = os.path.normpath(child) + if parent == child: + return True + + prefix = os.path.commonprefix([parent, child]) + if prefix != parent: + return False + # Note: os.path.commonprefix operates on character basis, so + # take extra care of situations like '/foo/ba' and '/foo/bar/baz' + child_suffix = child[len(prefix) :] + child_suffix = child_suffix.lstrip(os.sep) + return child == os.path.join(prefix, child_suffix) - prefix = os.path.commonprefix([parent, child]) - if prefix != parent: - return False - # Note: os.path.commonprefix operates on character basis, so - # take extra care of situations like '/foo/ba' and '/foo/bar/baz' - child_suffix = child[len(prefix):] - child_suffix = child_suffix.lstrip(os.sep) - return child == os.path.join(prefix, child_suffix) def main(): - filenames = ParseArguments(sys.argv[1:]) - backup_err = sys.stderr - try: - # Change stderr to write with replacement characters so we don't die - # if we try to print something containing non-ASCII characters. - sys.stderr = codecs.StreamReader(sys.stderr, 'replace') - - _cpplint_state.ResetErrorCounts() - for filename in filenames: - ProcessFile(filename, _cpplint_state.verbose_level) - # If --quiet is passed, suppress printing error count unless there are errors. - if not _cpplint_state.quiet or _cpplint_state.error_count > 0: - _cpplint_state.PrintErrorCounts() + filenames = ParseArguments(sys.argv[1:]) + backup_err = sys.stderr + try: + # Change stderr to write with replacement characters so we don't die + # if we try to print something containing non-ASCII characters. + sys.stderr = codecs.StreamReader(sys.stderr, "replace") + + _cpplint_state.ResetErrorCounts() + for filename in filenames: + ProcessFile(filename, _cpplint_state.verbose_level) + # If --quiet is passed, suppress printing error count unless there are errors. + if not _cpplint_state.quiet or _cpplint_state.error_count > 0: + _cpplint_state.PrintErrorCounts() - if _cpplint_state.output_format == 'junit': - sys.stderr.write(_cpplint_state.FormatJUnitXML()) + if _cpplint_state.output_format == "junit": + sys.stderr.write(_cpplint_state.FormatJUnitXML()) - finally: - sys.stderr = backup_err + finally: + sys.stderr = backup_err - sys.exit(_cpplint_state.error_count > 0) + sys.exit(_cpplint_state.error_count > 0) -if __name__ == '__main__': - main() +if __name__ == "__main__": + main() From 9250431282425cc70dc39f695a59341986a21932 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 16 Sep 2024 15:36:38 -0600 Subject: [PATCH 31/37] linter --- src/interface/sparse_pack_base.cpp | 1 + src/outputs/parthenon_xdmf.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/interface/sparse_pack_base.cpp b/src/interface/sparse_pack_base.cpp index 1266f24e1a04..2a7a5b70c41c 100644 --- a/src/interface/sparse_pack_base.cpp +++ b/src/interface/sparse_pack_base.cpp @@ -12,6 +12,7 @@ //======================================================================================== #include +#include #include #include #include diff --git a/src/outputs/parthenon_xdmf.cpp b/src/outputs/parthenon_xdmf.cpp index a5bdc34542b0..c360323a357a 100644 --- a/src/outputs/parthenon_xdmf.cpp +++ b/src/outputs/parthenon_xdmf.cpp @@ -30,6 +30,7 @@ // C++ #include #include +#include // Parthenon #include "basic_types.hpp" From f5c5d89b13585e9869b649833c4ccd25d335b684 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 16 Sep 2024 16:00:23 -0600 Subject: [PATCH 32/37] does this make the linter happy --- src/outputs/output_utils.cpp | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/outputs/output_utils.cpp b/src/outputs/output_utils.cpp index 9995869b8138..e124289ac56a 100644 --- a/src/outputs/output_utils.cpp +++ b/src/outputs/output_utils.cpp @@ -15,6 +15,8 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include +#include #include #include #include @@ -280,10 +282,7 @@ void ComputeCoords(Mesh *pm, bool face, const IndexRange &ib, const IndexRange & } } -// TODO(JMM): may need to generalize this -std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count) { - std::size_t out = 0; - tot_count = 0; +constexpr void CheckMPISizeT() { #ifdef MPI_PARALLEL // Need to use sizeof here because unsigned long long and unsigned // long are identical under the hood but registered as different @@ -291,8 +290,21 @@ std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count) { static_assert(std::is_integral::value && !std::is_signed::value, "size_t is unsigned and integral"); - static_assert(sizeof(std::size_t) == sizeof(unsigned long long int), + static_assert(sizeof(std::size_t) == sizeof(unsigned long long int), // NOLINT "MPI_UNSIGNED_LONG_LONG same as size_t"); + +#endif +} + +// TODO(JMM): may need to generalize this +std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count) { + std::size_t out = 0; + tot_count = 0; +#ifdef MPI_PARALLEL + // Need to use sizeof here because unsigned long long and unsigned + // long are identical under the hood but registered as different + // types + CheckMPISizeT(); std::vector buffer(Globals::nranks); MPI_Allgather(&local, 1, MPI_UNSIGNED_LONG_LONG, buffer.data(), 1, MPI_UNSIGNED_LONG_LONG, MPI_COMM_WORLD); @@ -307,19 +319,7 @@ std::size_t MPIPrefixSum(std::size_t local, std::size_t &tot_count) { #endif // MPI_PARALLEL return out; } -constexpr void CheckMPISizeT() { -#ifdef MPI_PARALLEL - // Need to use sizeof here because unsigned long long and unsigned - // long are identical under the hood but registered as different - // types - static_assert(std::is_integral::value && - !std::is_signed::value, - "size_t is unsigned and integral"); - static_assert(sizeof(std::size_t) == sizeof(unsigned long long int), - "MPI_UNSIGNED_LONG_LONG same as size_t"); -#endif -} std::size_t MPISum(std::size_t val) { #ifdef MPI_PARALLEL CheckMPISizeT(); From f03091bfc2163e99d2fb800ea20e00676648a873 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 16 Sep 2024 16:58:46 -0600 Subject: [PATCH 33/37] make new linter happy finaly I hope --- example/advection/parthenon_app_inputs.cpp | 3 +++ example/calculate_pi/pi_driver.cpp | 1 + example/fine_advection/parthenon_app_inputs.cpp | 1 + example/kokkos_pi/kokkos_pi.cpp | 2 +- example/poisson/parthenon_app_inputs.cpp | 1 + example/poisson/poisson_driver.cpp | 1 + example/poisson/poisson_package.cpp | 1 + example/poisson_gmg/poisson_driver.cpp | 1 + example/poisson_gmg/poisson_package.cpp | 1 + example/sparse_advection/parthenon_app_inputs.cpp | 6 +++++- example/stochastic_subgrid/parthenon_app_inputs.cpp | 3 +++ src/amr_criteria/amr_criteria.cpp | 2 ++ src/amr_criteria/refinement_package.cpp | 1 + src/argument_parser.hpp | 1 + src/bvals/bvals.cpp | 1 + src/bvals/comms/bnd_info.cpp | 1 + src/bvals/comms/build_boundary_buffers.cpp | 1 + src/bvals/comms/tag_map.cpp | 4 +++- src/bvals/neighbor_block.cpp | 1 + src/driver/driver.cpp | 3 +++ src/interface/data_collection.cpp | 1 + src/interface/meshblock_data.cpp | 2 ++ src/interface/metadata.cpp | 3 +++ src/interface/sparse_pool.cpp | 2 ++ src/interface/state_descriptor.cpp | 2 ++ src/interface/swarm.cpp | 1 + src/interface/swarm_container.cpp | 2 ++ src/interface/swarm_device_context.hpp | 2 ++ src/interface/update.cpp | 1 + src/interface/variable.cpp | 2 ++ src/interface/variable_pack.hpp | 1 + src/mesh/mesh-amr_loadbalance.cpp | 2 ++ src/mesh/mesh-gmg.cpp | 2 ++ src/outputs/histogram.cpp | 1 + src/outputs/history.cpp | 1 + src/outputs/outputs.cpp | 1 + src/parameter_input.cpp | 1 + src/parthenon_manager.cpp | 2 ++ src/parthenon_manager.hpp | 1 + src/solvers/bicgstab_solver.hpp | 1 + src/solvers/mg_solver.hpp | 1 + src/tasks/tasks.cpp | 2 ++ src/utils/alias_method.cpp | 1 + src/utils/error_checking.cpp | 2 ++ src/utils/error_checking.hpp | 1 + src/utils/object_pool.hpp | 5 +++-- src/utils/string_utils.cpp | 2 ++ tst/unit/kokkos_abstraction.cpp | 1 + tst/unit/test_concepts_lite.cpp | 1 + tst/unit/test_forest.cpp | 4 ++++ tst/unit/test_index_split.cpp | 1 + tst/unit/test_logical_location.cpp | 5 +++++ tst/unit/test_meshblock_data_iterator.cpp | 2 ++ tst/unit/test_metadata.cpp | 3 +++ tst/unit/test_sparse_pack.cpp | 1 + tst/unit/test_unit_domain.cpp | 1 + tst/unit/test_unit_sort.cpp | 1 + tst/unit/test_upper_bound.cpp | 1 + 58 files changed, 97 insertions(+), 5 deletions(-) diff --git a/example/advection/parthenon_app_inputs.cpp b/example/advection/parthenon_app_inputs.cpp index e3d2a60fc132..3a8e918c11a7 100644 --- a/example/advection/parthenon_app_inputs.cpp +++ b/example/advection/parthenon_app_inputs.cpp @@ -11,6 +11,9 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include +#include +#include #include #include diff --git a/example/calculate_pi/pi_driver.cpp b/example/calculate_pi/pi_driver.cpp index d656d3bfdf3d..c964d794659d 100644 --- a/example/calculate_pi/pi_driver.cpp +++ b/example/calculate_pi/pi_driver.cpp @@ -13,6 +13,7 @@ // Standard Includes #include +#include #include #include #include diff --git a/example/fine_advection/parthenon_app_inputs.cpp b/example/fine_advection/parthenon_app_inputs.cpp index d11635e1fd57..ab970175e9cf 100644 --- a/example/fine_advection/parthenon_app_inputs.cpp +++ b/example/fine_advection/parthenon_app_inputs.cpp @@ -11,6 +11,7 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include #include #include diff --git a/example/kokkos_pi/kokkos_pi.cpp b/example/kokkos_pi/kokkos_pi.cpp index d3eb1d545852..49bc1167750f 100644 --- a/example/kokkos_pi/kokkos_pi.cpp +++ b/example/kokkos_pi/kokkos_pi.cpp @@ -47,7 +47,7 @@ // and using flat range and MDRange in Kokkos // -#include +#include #include #include diff --git a/example/poisson/parthenon_app_inputs.cpp b/example/poisson/parthenon_app_inputs.cpp index 2dd29551f350..d8dd0395247b 100644 --- a/example/poisson/parthenon_app_inputs.cpp +++ b/example/poisson/parthenon_app_inputs.cpp @@ -13,6 +13,7 @@ #include #include +#include #include diff --git a/example/poisson/poisson_driver.cpp b/example/poisson/poisson_driver.cpp index 030001069c5a..79984fe705c8 100644 --- a/example/poisson/poisson_driver.cpp +++ b/example/poisson/poisson_driver.cpp @@ -12,6 +12,7 @@ //======================================================================================== #include +#include #include #include #include diff --git a/example/poisson/poisson_package.cpp b/example/poisson/poisson_package.cpp index e3f5bf75d3ae..3f1376be9707 100644 --- a/example/poisson/poisson_package.cpp +++ b/example/poisson/poisson_package.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/example/poisson_gmg/poisson_driver.cpp b/example/poisson_gmg/poisson_driver.cpp index b995613da077..0eb378020a42 100644 --- a/example/poisson_gmg/poisson_driver.cpp +++ b/example/poisson_gmg/poisson_driver.cpp @@ -12,6 +12,7 @@ //======================================================================================== #include +#include #include #include #include diff --git a/example/poisson_gmg/poisson_package.cpp b/example/poisson_gmg/poisson_package.cpp index 1826bda428af..ebb21ba5daf1 100644 --- a/example/poisson_gmg/poisson_package.cpp +++ b/example/poisson_gmg/poisson_package.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/example/sparse_advection/parthenon_app_inputs.cpp b/example/sparse_advection/parthenon_app_inputs.cpp index 0f9730d7f718..3f4a3868665c 100644 --- a/example/sparse_advection/parthenon_app_inputs.cpp +++ b/example/sparse_advection/parthenon_app_inputs.cpp @@ -10,9 +10,13 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include +#include #include +#include #include #include +#include #include @@ -141,7 +145,7 @@ void PostStepDiagnosticsInLoop(Mesh *mesh, ParameterInput *pin, const SimTime &t } #ifdef MPI_PARALLEL - static_assert(sizeof(std::uint64_t) == sizeof(unsigned long long int), + static_assert(sizeof(std::uint64_t) == sizeof(unsigned long long int), // NOLINT "MPI_UNSIGNED_LONG_LONG same as uint64_t"); if (Globals::my_rank == 0) { PARTHENON_MPI_CHECK(MPI_Reduce(MPI_IN_PLACE, num_allocated.data(), n, MPI_INT, diff --git a/example/stochastic_subgrid/parthenon_app_inputs.cpp b/example/stochastic_subgrid/parthenon_app_inputs.cpp index b4a5df5fe866..f3ddac3cede7 100644 --- a/example/stochastic_subgrid/parthenon_app_inputs.cpp +++ b/example/stochastic_subgrid/parthenon_app_inputs.cpp @@ -11,6 +11,9 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include +#include +#include #include #include diff --git a/src/amr_criteria/amr_criteria.cpp b/src/amr_criteria/amr_criteria.cpp index a8cb19cd8218..41b2237d96ff 100644 --- a/src/amr_criteria/amr_criteria.cpp +++ b/src/amr_criteria/amr_criteria.cpp @@ -12,7 +12,9 @@ //======================================================================================== #include "amr_criteria/amr_criteria.hpp" +#include #include +#include #include "amr_criteria/refinement_package.hpp" #include "interface/meshblock_data.hpp" diff --git a/src/amr_criteria/refinement_package.cpp b/src/amr_criteria/refinement_package.cpp index 459877767d1d..81342b70695a 100644 --- a/src/amr_criteria/refinement_package.cpp +++ b/src/amr_criteria/refinement_package.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "amr_criteria/amr_criteria.hpp" diff --git a/src/argument_parser.hpp b/src/argument_parser.hpp index 60a6dd11ba41..8b79eaebc975 100644 --- a/src/argument_parser.hpp +++ b/src/argument_parser.hpp @@ -14,6 +14,7 @@ #ifndef ARGUMENT_PARSER_HPP_ #define ARGUMENT_PARSER_HPP_ +#include #include #include diff --git a/src/bvals/bvals.cpp b/src/bvals/bvals.cpp index 3581b6c0732f..37a4d4689916 100644 --- a/src/bvals/bvals.cpp +++ b/src/bvals/bvals.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/src/bvals/comms/bnd_info.cpp b/src/bvals/comms/bnd_info.cpp index 1505b56f956a..736992260913 100644 --- a/src/bvals/comms/bnd_info.cpp +++ b/src/bvals/comms/bnd_info.cpp @@ -16,6 +16,7 @@ //======================================================================================== #include +#include #include // debug #include #include diff --git a/src/bvals/comms/build_boundary_buffers.cpp b/src/bvals/comms/build_boundary_buffers.cpp index aac532d037e6..9a4e3b5c4048 100644 --- a/src/bvals/comms/build_boundary_buffers.cpp +++ b/src/bvals/comms/build_boundary_buffers.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "bvals_in_one.hpp" diff --git a/src/bvals/comms/tag_map.cpp b/src/bvals/comms/tag_map.cpp index d288b4182cd6..d8a2b4f132eb 100644 --- a/src/bvals/comms/tag_map.cpp +++ b/src/bvals/comms/tag_map.cpp @@ -15,9 +15,11 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== -#include "tag_map.hpp" +#include + #include "bnd_info.hpp" #include "bvals_utils.hpp" +#include "tag_map.hpp" #include "utils/loop_utils.hpp" namespace parthenon { diff --git a/src/bvals/neighbor_block.cpp b/src/bvals/neighbor_block.cpp index da9a730ea078..a66908c117ee 100644 --- a/src/bvals/neighbor_block.cpp +++ b/src/bvals/neighbor_block.cpp @@ -28,6 +28,7 @@ #include // runtime_error #include // c_str() #include +#include #include "globals.hpp" #include "mesh/forest/logical_location.hpp" diff --git a/src/driver/driver.cpp b/src/driver/driver.cpp index 85327ecbae3b..17b913e3a2ac 100644 --- a/src/driver/driver.cpp +++ b/src/driver/driver.cpp @@ -15,7 +15,10 @@ #include #include #include +#include #include +#include +#include #include "driver/driver.hpp" diff --git a/src/interface/data_collection.cpp b/src/interface/data_collection.cpp index f28305b5411a..310f000adaa8 100644 --- a/src/interface/data_collection.cpp +++ b/src/interface/data_collection.cpp @@ -11,6 +11,7 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include #include #include "interface/data_collection.hpp" diff --git a/src/interface/meshblock_data.cpp b/src/interface/meshblock_data.cpp index fcc2407eb574..69f99ec82fa5 100644 --- a/src/interface/meshblock_data.cpp +++ b/src/interface/meshblock_data.cpp @@ -15,9 +15,11 @@ #include #include +#include #include #include #include +#include #include #include #include diff --git a/src/interface/metadata.cpp b/src/interface/metadata.cpp index 2f72fb78269b..ebed9805fc91 100644 --- a/src/interface/metadata.cpp +++ b/src/interface/metadata.cpp @@ -13,8 +13,11 @@ #include "interface/metadata.hpp" +#include #include #include +#include +#include #include #include #include diff --git a/src/interface/sparse_pool.cpp b/src/interface/sparse_pool.cpp index 7890cff964de..12897c5659fe 100644 --- a/src/interface/sparse_pool.cpp +++ b/src/interface/sparse_pool.cpp @@ -12,6 +12,8 @@ //======================================================================================== #include +#include +#include #include "interface/sparse_pool.hpp" diff --git a/src/interface/state_descriptor.cpp b/src/interface/state_descriptor.cpp index e6d176dda55b..9116298e8bd5 100644 --- a/src/interface/state_descriptor.cpp +++ b/src/interface/state_descriptor.cpp @@ -13,11 +13,13 @@ #include #include +#include #include #include #include #include #include +#include #include #include "basic_types.hpp" diff --git a/src/interface/swarm.cpp b/src/interface/swarm.cpp index 8e715f1cb8db..6fef47d3fb5f 100644 --- a/src/interface/swarm.cpp +++ b/src/interface/swarm.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/src/interface/swarm_container.cpp b/src/interface/swarm_container.cpp index 6e62ee0e72b6..bd40fc2b9686 100644 --- a/src/interface/swarm_container.cpp +++ b/src/interface/swarm_container.cpp @@ -11,7 +11,9 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== #include +#include #include +#include #include #include diff --git a/src/interface/swarm_device_context.hpp b/src/interface/swarm_device_context.hpp index ed958126f36b..1dbcf3383901 100644 --- a/src/interface/swarm_device_context.hpp +++ b/src/interface/swarm_device_context.hpp @@ -13,6 +13,8 @@ #ifndef INTERFACE_SWARM_DEVICE_CONTEXT_HPP_ #define INTERFACE_SWARM_DEVICE_CONTEXT_HPP_ +#include + #include "coordinates/coordinates.hpp" #include "utils/utils.hpp" diff --git a/src/interface/update.cpp b/src/interface/update.cpp index 6282490a1073..18ad871308d8 100644 --- a/src/interface/update.cpp +++ b/src/interface/update.cpp @@ -14,6 +14,7 @@ #include "interface/update.hpp" #include +#include #include "config.hpp" #include "coordinates/coordinates.hpp" diff --git a/src/interface/variable.cpp b/src/interface/variable.cpp index c5375b7aad4f..396edbd73cfe 100644 --- a/src/interface/variable.cpp +++ b/src/interface/variable.cpp @@ -13,8 +13,10 @@ #include "interface/variable.hpp" +#include #include #include +#include #include #include diff --git a/src/interface/variable_pack.hpp b/src/interface/variable_pack.hpp index fba75750f691..037731093ce1 100644 --- a/src/interface/variable_pack.hpp +++ b/src/interface/variable_pack.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/src/mesh/mesh-amr_loadbalance.cpp b/src/mesh/mesh-amr_loadbalance.cpp index fdb454fb2a34..d54167026066 100644 --- a/src/mesh/mesh-amr_loadbalance.cpp +++ b/src/mesh/mesh-amr_loadbalance.cpp @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include "parthenon_mpi.hpp" diff --git a/src/mesh/mesh-gmg.cpp b/src/mesh/mesh-gmg.cpp index 71784b7d5a01..791449aa7acd 100644 --- a/src/mesh/mesh-gmg.cpp +++ b/src/mesh/mesh-gmg.cpp @@ -20,11 +20,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include "parthenon_mpi.hpp" diff --git a/src/outputs/histogram.cpp b/src/outputs/histogram.cpp index 3abcc514775e..f983df0018e9 100644 --- a/src/outputs/histogram.cpp +++ b/src/outputs/histogram.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include diff --git a/src/outputs/history.cpp b/src/outputs/history.cpp index 7a882a44c61a..2e1310062d48 100644 --- a/src/outputs/history.cpp +++ b/src/outputs/history.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/src/outputs/outputs.cpp b/src/outputs/outputs.cpp index 24d528cadaf8..430ccdf3026c 100644 --- a/src/outputs/outputs.cpp +++ b/src/outputs/outputs.cpp @@ -67,6 +67,7 @@ #include #include #include +#include #include "coordinates/coordinates.hpp" #include "defs.hpp" diff --git a/src/parameter_input.cpp b/src/parameter_input.cpp index 45012e0912bd..dc6651e0b24c 100644 --- a/src/parameter_input.cpp +++ b/src/parameter_input.cpp @@ -57,6 +57,7 @@ #include #include #include +#include #include #include "globals.hpp" diff --git a/src/parthenon_manager.cpp b/src/parthenon_manager.cpp index 61396f42721c..b0ec6ac971ae 100644 --- a/src/parthenon_manager.cpp +++ b/src/parthenon_manager.cpp @@ -19,6 +19,8 @@ #include #include +#include +#include #include #include #include diff --git a/src/parthenon_manager.hpp b/src/parthenon_manager.hpp index d9cf3de1bf09..2f05f671b1b0 100644 --- a/src/parthenon_manager.hpp +++ b/src/parthenon_manager.hpp @@ -14,6 +14,7 @@ #ifndef PARTHENON_MANAGER_HPP_ #define PARTHENON_MANAGER_HPP_ +#include #include #include #include diff --git a/src/solvers/bicgstab_solver.hpp b/src/solvers/bicgstab_solver.hpp index caa594337b61..8632d0a68a67 100644 --- a/src/solvers/bicgstab_solver.hpp +++ b/src/solvers/bicgstab_solver.hpp @@ -13,6 +13,7 @@ #ifndef SOLVERS_BICGSTAB_SOLVER_HPP_ #define SOLVERS_BICGSTAB_SOLVER_HPP_ +#include #include #include #include diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp index ee8cfff177ab..a68da46ee645 100644 --- a/src/solvers/mg_solver.hpp +++ b/src/solvers/mg_solver.hpp @@ -14,6 +14,7 @@ #define SOLVERS_MG_SOLVER_HPP_ #include +#include #include #include #include diff --git a/src/tasks/tasks.cpp b/src/tasks/tasks.cpp index 8d4cbd656d55..a231c43bf380 100644 --- a/src/tasks/tasks.cpp +++ b/src/tasks/tasks.cpp @@ -11,9 +11,11 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include #include #include #include +#include #include #if __has_include() diff --git a/src/utils/alias_method.cpp b/src/utils/alias_method.cpp index 2a04cfcedeaf..aec77e0afa81 100644 --- a/src/utils/alias_method.cpp +++ b/src/utils/alias_method.cpp @@ -18,6 +18,7 @@ #include #include +#include namespace parthenon { namespace AliasMethod { diff --git a/src/utils/error_checking.cpp b/src/utils/error_checking.cpp index 7c071a398475..2198f4d8b47c 100644 --- a/src/utils/error_checking.cpp +++ b/src/utils/error_checking.cpp @@ -15,6 +15,8 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include + #include "error_checking.hpp" #ifdef MPI_PARALLEL diff --git a/src/utils/error_checking.hpp b/src/utils/error_checking.hpp index f38dba9e1dee..eec8dd5b7870 100644 --- a/src/utils/error_checking.hpp +++ b/src/utils/error_checking.hpp @@ -19,6 +19,7 @@ //! \file error_checking.hpp // \brief utility macros for error checking +#include #include #include #include diff --git a/src/utils/object_pool.hpp b/src/utils/object_pool.hpp index c7452499f126..2140c193feec 100644 --- a/src/utils/object_pool.hpp +++ b/src/utils/object_pool.hpp @@ -179,8 +179,9 @@ class ObjectPool::owner_t : public ObjectPool::weak_t { KOKKOS_FUNCTION ~owner_t() noexcept { - KOKKOS_IF_ON_HOST( - if (weak_t::pool_ != nullptr) { (*weak_t::pool_).ReferenceCountedFree(*this); }) + KOKKOS_IF_ON_HOST(if (weak_t::pool_ != nullptr) { + (*weak_t::pool_).ReferenceCountedFree(*this); + }) // NOLINT } // Warning, the move constructors are messed up and don't copy over the weak_t diff --git a/src/utils/string_utils.cpp b/src/utils/string_utils.cpp index e6c2891df5c8..4ce088238e71 100644 --- a/src/utils/string_utils.cpp +++ b/src/utils/string_utils.cpp @@ -14,6 +14,8 @@ #include "string_utils.hpp" #include +#include +#include #include "error_checking.hpp" diff --git a/tst/unit/kokkos_abstraction.cpp b/tst/unit/kokkos_abstraction.cpp index ae0e3fcb79e8..525877375121 100644 --- a/tst/unit/kokkos_abstraction.cpp +++ b/tst/unit/kokkos_abstraction.cpp @@ -18,6 +18,7 @@ //======================================================================================== #include +#include #include #include diff --git a/tst/unit/test_concepts_lite.cpp b/tst/unit/test_concepts_lite.cpp index 52aba0a1d3e2..d58fbbe34c5b 100644 --- a/tst/unit/test_concepts_lite.cpp +++ b/tst/unit/test_concepts_lite.cpp @@ -11,6 +11,7 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include #include #include diff --git a/tst/unit/test_forest.cpp b/tst/unit/test_forest.cpp index 32cf343affa4..2bf0a697c030 100644 --- a/tst/unit/test_forest.cpp +++ b/tst/unit/test_forest.cpp @@ -15,8 +15,12 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include #include +#include #include +#include +#include #include diff --git a/tst/unit/test_index_split.cpp b/tst/unit/test_index_split.cpp index c060f19df325..ff62008dae6f 100644 --- a/tst/unit/test_index_split.cpp +++ b/tst/unit/test_index_split.cpp @@ -12,6 +12,7 @@ //======================================================================================== #include #include +#include #include #include diff --git a/tst/unit/test_logical_location.cpp b/tst/unit/test_logical_location.cpp index b1b46bf98afc..8b4507d332bd 100644 --- a/tst/unit/test_logical_location.cpp +++ b/tst/unit/test_logical_location.cpp @@ -17,7 +17,12 @@ #include #include +#include #include +#include +#include +#include +#include #include diff --git a/tst/unit/test_meshblock_data_iterator.cpp b/tst/unit/test_meshblock_data_iterator.cpp index 16409b2ef678..d7b1ee709eed 100644 --- a/tst/unit/test_meshblock_data_iterator.cpp +++ b/tst/unit/test_meshblock_data_iterator.cpp @@ -18,7 +18,9 @@ #include #include #include +#include #include +#include #include #include diff --git a/tst/unit/test_metadata.cpp b/tst/unit/test_metadata.cpp index b556e9f763e0..16dcdcf0469f 100644 --- a/tst/unit/test_metadata.cpp +++ b/tst/unit/test_metadata.cpp @@ -11,6 +11,9 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include +#include + #include #include "basic_types.hpp" diff --git a/tst/unit/test_sparse_pack.cpp b/tst/unit/test_sparse_pack.cpp index 33ffd70e9bb5..30a81fba0e94 100644 --- a/tst/unit/test_sparse_pack.cpp +++ b/tst/unit/test_sparse_pack.cpp @@ -12,6 +12,7 @@ //======================================================================================== #include #include +#include #include #include diff --git a/tst/unit/test_unit_domain.cpp b/tst/unit/test_unit_domain.cpp index 06662b795657..36508aca703f 100644 --- a/tst/unit/test_unit_domain.cpp +++ b/tst/unit/test_unit_domain.cpp @@ -17,6 +17,7 @@ #include #include +#include #include "mesh/domain.hpp" diff --git a/tst/unit/test_unit_sort.cpp b/tst/unit/test_unit_sort.cpp index a8489832438f..eccc6cb6c022 100644 --- a/tst/unit/test_unit_sort.cpp +++ b/tst/unit/test_unit_sort.cpp @@ -15,6 +15,7 @@ // the public, perform publicly and display publicly, and to permit others to do so. //======================================================================================== +#include #include #include diff --git a/tst/unit/test_upper_bound.cpp b/tst/unit/test_upper_bound.cpp index 4bd3eef66471..975520b24360 100644 --- a/tst/unit/test_upper_bound.cpp +++ b/tst/unit/test_upper_bound.cpp @@ -17,6 +17,7 @@ #include #include +#include #include From 7cf8acb780ad5ef2e0af2ec87833a0ee43cb62c0 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 18 Sep 2024 09:06:47 -0600 Subject: [PATCH 34/37] Fix pointer issue when calculating initial residual in task list --- src/solvers/bicgstab_solver.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/solvers/bicgstab_solver.hpp b/src/solvers/bicgstab_solver.hpp index 8632d0a68a67..dcc74165451e 100644 --- a/src/solvers/bicgstab_solver.hpp +++ b/src/solvers/bicgstab_solver.hpp @@ -145,18 +145,18 @@ class BiCGSTABSolver { tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", [&](BiCGSTABSolver *solver, std::shared_ptr res_tol, - bool relative_residual) { + bool relative_residual, Mesh *pm) { if (Globals::my_rank == 0 && params_.print_per_step) { Real tol = relative_residual - ? *res_tol * std::sqrt(solver->rhs2.val / pmesh->GetTotalCells()) + ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) : *res_tol; printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", tol); } return TaskStatus::complete; }, - this, params_.residual_tolerance, params_.relative_residual); + this, params_.residual_tolerance, params_.relative_residual, pmesh); // BEGIN ITERATIVE TASKS auto [itl, solver_id] = tl.AddSublist(initialize, {1, params_.max_iters}); From d3609881a56ac5612f8a93e4aee6d1729c998512 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 18 Sep 2024 09:15:21 -0600 Subject: [PATCH 35/37] changelog, format, lint --- CHANGELOG.md | 1 + src/solvers/bicgstab_solver.hpp | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c142aa8a7e03..19ca04abc04d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - [[PR 1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls ### Fixed (not changing behavior/API/variables/...) +- [[PR 1178]](https://github.com/parthenon-hpc-lab/parthenon/pull/1178) Fix issue with mesh pointer when using relative residual tolerance in BiCGSTAB solver. - [[PR1173]](https://github.com/parthenon-hpc-lab/parthenon/pull/1173) Make debugging easier by making parthenon throw an error if ParameterInput is different on multiple MPI ranks. ### Infrastructure (changes irrelevant to downstream codes) diff --git a/src/solvers/bicgstab_solver.hpp b/src/solvers/bicgstab_solver.hpp index dcc74165451e..3d7d3f604c6f 100644 --- a/src/solvers/bicgstab_solver.hpp +++ b/src/solvers/bicgstab_solver.hpp @@ -144,13 +144,12 @@ class BiCGSTABSolver { this); tl.AddTask( TaskQualifier::once_per_region, initialize, "print to screen", - [&](BiCGSTABSolver *solver, std::shared_ptr res_tol, - bool relative_residual, Mesh *pm) { + [&](BiCGSTABSolver *solver, std::shared_ptr res_tol, bool relative_residual, + Mesh *pm) { if (Globals::my_rank == 0 && params_.print_per_step) { - Real tol = - relative_residual - ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) - : *res_tol; + Real tol = relative_residual + ? *res_tol * std::sqrt(solver->rhs2.val / pm->GetTotalCells()) + : *res_tol; printf("# [0] v-cycle\n# [1] rms-residual (tol = %e) \n# [2] rms-error\n", tol); } From 3de592d58820d965298326cb0f476b7eb3051e52 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 25 Sep 2024 17:43:20 -0600 Subject: [PATCH 36/37] Make a global variable for whether simulation is a restart (#1179) * Set is_restart * Add soft equivalence * soft_equiv used for testing * CHANGELOG, test_kokkos_abstraction * copyright * Oops wrong var * Oops comparing error and reference * Update src/utils/robust.hpp Co-authored-by: Jonah Miller * style * naming * Make code more self-documenting --------- Co-authored-by: Jonah Miller --- CHANGELOG.md | 1 + src/globals.cpp | 7 ++++--- src/globals.hpp | 3 ++- src/parthenon_manager.cpp | 2 ++ src/utils/robust.hpp | 12 ++++++++++++ tst/unit/CMakeLists.txt | 2 +- ...raction.cpp => test_kokkos_abstraction.cpp} | 18 ++++++++++-------- 7 files changed, 32 insertions(+), 13 deletions(-) rename tst/unit/{kokkos_abstraction.cpp => test_kokkos_abstraction.cpp} (98%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19ca04abc04d..a7373b19036b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) +- [[PR 1179]](https://github.com/parthenon-hpc-lab/parthenon/pull/1179) Make a global variable for whether simulation is a restart - [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option - [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades diff --git a/src/globals.cpp b/src/globals.cpp index 139a01fbddf6..b00db585a3fe 100644 --- a/src/globals.cpp +++ b/src/globals.cpp @@ -3,7 +3,7 @@ // Copyright(C) 2014 James M. Stone and other code contributors // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -30,8 +30,9 @@ namespace Globals { int nghost; // all of these global variables are set at the start of main(): -int my_rank; // MPI rank of this process -int nranks; // total number of MPI ranks +int my_rank; // MPI rank of this process +int nranks; // total number of MPI ranks +bool is_restart; // Whether this simulation is restarted from a checkpoint file // sparse configuration values that are needed in various places SparseConfig sparse_config; diff --git a/src/globals.hpp b/src/globals.hpp index 870a0803d8ce..539c158d6570 100644 --- a/src/globals.hpp +++ b/src/globals.hpp @@ -3,7 +3,7 @@ // Copyright(C) 2014 James M. Stone and other code contributors // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -36,6 +36,7 @@ struct SparseConfig { }; extern int my_rank, nranks, nghost; +extern bool is_restart; extern SparseConfig sparse_config; diff --git a/src/parthenon_manager.cpp b/src/parthenon_manager.cpp index b0ec6ac971ae..26b5909e8775 100644 --- a/src/parthenon_manager.cpp +++ b/src/parthenon_manager.cpp @@ -70,6 +70,8 @@ ParthenonStatus ParthenonManager::ParthenonInitEnv(int argc, char *argv[]) { Globals::nranks = 1; #endif // MPI_PARALLEL + Globals::is_restart = IsRestart(); + Kokkos::initialize(argc, argv); // pgrete: This is a hack to disable allocation tracking until the Kokkos diff --git a/src/utils/robust.hpp b/src/utils/robust.hpp index 1714d9359b17..ca205db2430c 100644 --- a/src/utils/robust.hpp +++ b/src/utils/robust.hpp @@ -60,6 +60,18 @@ KOKKOS_INLINE_FUNCTION auto ratio(const A &a, const B &b) { const B sgn = b >= 0 ? 1 : -1; return a / (b + sgn * SMALL()); } + +// Return true equivalence if value and reference differ by less than precision +// Optionally return true if reference value is close to zero +KOKKOS_FORCEINLINE_FUNCTION +bool SoftEquiv(const Real &val, const Real &ref, + const Real eps = 10. * std::numeric_limits::epsilon(), + const bool pass_on_small = true) { + const bool is_close = std::abs(val - ref) < eps * std::abs(ref); + const bool is_small = std::abs(ref) < std::numeric_limits::min(); + return (is_close || (is_small && pass_on_small)); +} + } // namespace robust } // namespace parthenon #endif // UTILS_ROBUST_HPP_ diff --git a/tst/unit/CMakeLists.txt b/tst/unit/CMakeLists.txt index c892572bdde9..7d36ebb2be49 100644 --- a/tst/unit/CMakeLists.txt +++ b/tst/unit/CMakeLists.txt @@ -25,7 +25,7 @@ list(APPEND unit_tests_SOURCES test_unit_constants.cpp test_unit_domain.cpp test_unit_sort.cpp - kokkos_abstraction.cpp + test_kokkos_abstraction.cpp test_index_split.cpp test_logical_location.cpp test_forest.cpp diff --git a/tst/unit/kokkos_abstraction.cpp b/tst/unit/test_kokkos_abstraction.cpp similarity index 98% rename from tst/unit/kokkos_abstraction.cpp rename to tst/unit/test_kokkos_abstraction.cpp index 525877375121..767e40f38315 100644 --- a/tst/unit/kokkos_abstraction.cpp +++ b/tst/unit/test_kokkos_abstraction.cpp @@ -26,12 +26,14 @@ #include "basic_types.hpp" #include "kokkos_abstraction.hpp" +#include "utils/robust.hpp" using parthenon::DevExecSpace; using parthenon::ParArray1D; using parthenon::ParArray2D; using parthenon::ParArray3D; using parthenon::ParArray4D; +using parthenon::robust::SoftEquiv; using Real = double; template @@ -316,7 +318,6 @@ bool test_wrapper_nested_3d(OuterLoopPattern outer_loop_pattern, // Copy array back from device to host Kokkos::deep_copy(host_du, dev_du); - Real max_rel_err = -1; const Real rel_tol = std::numeric_limits::epsilon(); // compare data on the host @@ -324,14 +325,15 @@ bool test_wrapper_nested_3d(OuterLoopPattern outer_loop_pattern, for (int j = 0; j < N; j++) { for (int i = 1; i < N - 1; i++) { const Real analytic = 2.0 * (i + 1) * pow((j + 2) * (k + 3), 2.0); - const Real err = host_du(k, j, i - 1) - analytic; - max_rel_err = fmax(fabs(err / analytic), max_rel_err); + if (!SoftEquiv(host_du(k, j, i - 1), analytic, rel_tol)) { + return false; + } } } } - return max_rel_err < rel_tol; + return true; } template @@ -385,7 +387,6 @@ bool test_wrapper_nested_4d(OuterLoopPattern outer_loop_pattern, // Copy array back from device to host Kokkos::deep_copy(host_du, dev_du); - Real max_rel_err = -1; const Real rel_tol = std::numeric_limits::epsilon(); // compare data on the host @@ -394,15 +395,16 @@ bool test_wrapper_nested_4d(OuterLoopPattern outer_loop_pattern, for (int j = 0; j < N; j++) { for (int i = 1; i < N - 1; i++) { const Real analytic = 2.0 * (i + 1) * pow((j + 2) * (k + 3) * (n + 4), 2.0); - const Real err = host_du(n, k, j, i - 1) - analytic; - max_rel_err = fmax(fabs(err / analytic), max_rel_err); + if (!SoftEquiv(host_du(n, k, j, i - 1), analytic, rel_tol)) { + return false; + } } } } } - return max_rel_err < rel_tol; + return true; } TEST_CASE("nested par_for loops", "[wrapper]") { From 3283635ad1d509d78723e3412beede61eb466cb0 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Thu, 26 Sep 2024 17:40:33 +0200 Subject: [PATCH 37/37] Change delim. Is this stupid? --- src/outputs/parthenon_opmd.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/outputs/parthenon_opmd.hpp b/src/outputs/parthenon_opmd.hpp index 2c7035dd13e9..c67c7366a1a8 100644 --- a/src/outputs/parthenon_opmd.hpp +++ b/src/outputs/parthenon_opmd.hpp @@ -25,7 +25,7 @@ namespace OpenPMDUtils { // access to non-standard groups (such as "Params" versus the standard "meshes"). // TODO(pgrete & reviewer) (agree on delim and add check for package name and keys) OR // better use of opmd-api -inline static const std::string delim = "+"; +inline static const std::string delim = "🤝"; // Construct OpenPMD Mesh "record" name and comonnent identifier. // - comp_idx is a flattended index over all components of the vectors and tensors, i.e.,