Skip to content

Commit

Permalink
Merge pull request #815 from googlefonts/tag-fix
Browse files Browse the repository at this point in the history
tags: fix _parse_csv logic
  • Loading branch information
m4rc1e authored Jan 26, 2024
2 parents 3ac815f + 2ca718d commit e4bdb2d
Showing 1 changed file with 101 additions and 134 deletions.
235 changes: 101 additions & 134 deletions Lib/gftools/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,60 +136,24 @@ def _parse_csv(self):
{"Family": str, "Group/Tag": str, "Weight": int},
...
]"""
columns = []
res = []
seen_group_result = set()
for row_idx, row in enumerate(self.data):
if row_idx == 1:
columns = row
# Some rows have been used as padding so skip them.
if row_idx < 4:
continue
for col_idx, cell in enumerate(row):
# Doc also contains columns used for padding... meh!
if cell == "" or columns[col_idx] == "":
# rows < 4 are column headers and padding
for i in range(4, len(self.data)):
# columns < 9 are personal quality scores, filepaths, imgs and padding
for j in range(9, len(self.data[i])):
if not self.data[i][j].isnumeric():
continue
# Group names are on row 0 and tags are on row 1. To find a
# tag's group name, we iterate backwards on row 0 until we
# hit a value e.g:
# Sans, , ,Serif,
# ,Humanist,Grotesk, ,Garalde,Didone
#
# ["Sans/Humanist", "Sans/Grotesk", "Serif/Garalde", "Serif/Didone"]
group_idx, group = next(
(i, self.data[0][i])
for i in range(col_idx, 0, -1)
if self.data[0][i] != ""
)
# Some groups such as Sans and Serif also have values for each family e.g
# Sans <--- Both families have values for Sans
# Modern Traditional
# Abel 20 30
# Wave 90 90
if group not in seen_group_result and self.data[1][group_idx] == "":
if self.data[row_idx][group_idx] == "":
continue
res.append(
{
"Family": row[0],
"Group/Tag": f"/{group}/{group}",
"Weight": int(self.data[row_idx][group_idx]),
}
)
seen_group_result.add(group)
if group not in self.CATEGORIES:
raise ValueError(
f"{group} isn't a know category, {self.CATEGORIES.keys()}"
)

tag = columns[col_idx]
if tag not in self.CATEGORIES[group]:
raise ValueError(f"{tag} isn't in {self.CATEGORIES[group]}")
family = self.data[i][0]
value = int(self.data[i][j])
group = self.data[0][j]
# If no tag exists for a value, it means a value has been assigned
# to the whole group such as Sans, Sans Serif etc
tag = self.data[1][j] or group
res.append(
{
"Family": row[0],
"Family": family,
"Group/Tag": f"/{group}/{tag}",
"Weight": int(cell),
"Weight": value,
}
)
res.sort(key=lambda k: (k["Family"], k["Group/Tag"]))
Expand All @@ -210,7 +174,8 @@ def check_structure(self):
# isn't tabular. However, using a Google Sheet does mean we can all
# edit the data collaboratively and it does mean users don't need to
# know git or install other tools.
# Please don't cry about all the empty columns below ;-).
# Please don't cry about all the empty columns below ;-). They're
# mainly used as whitespace in the spreadsheet
columns_0 = [
"Family",
"Family Dir",
Expand All @@ -222,112 +187,112 @@ def check_structure(self):
"UT's Quality Score",
" Type \n Categories",
"Serif",
"",
"",
"",
"",
"",
"",
"",
"Serif",
"Serif",
"Serif",
"Serif",
"Serif",
"Serif",
"Serif",
"",
"Sans",
"",
"",
"",
"",
"",
"",
"",
"Sans",
"Sans",
"Sans",
"Sans",
"Sans",
"Sans",
"Sans",
"",
"Slab",
"",
"",
"",
"Slab",
"Slab",
"Slab",
"",
"Script",
"",
"",
"",
"",
"Script",
"Script",
"Script",
"Script",
"",
"Display",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"Display",
"Display",
"Display",
"Display",
"Display",
"Display",
"Display",
"Display",
"Display",
"Display",
"Display",
"Display",
"Display",
"Display",
"",
"Arabic",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"Arabic",
"Arabic",
"Arabic",
"Arabic",
"Arabic",
"Arabic",
"Arabic",
"Arabic",
"Arabic",
"Arabic",
"",
"Hebrew",
"",
"",
"",
"",
"Hebrew",
"Hebrew",
"Hebrew",
"Hebrew",
"",
"South East Asian (Thai, Khmer, Lao)",
"",
"",
"",
"",
"South East Asian (Thai, Khmer, Lao)",
"South East Asian (Thai, Khmer, Lao)",
"South East Asian (Thai, Khmer, Lao)",
"South East Asian (Thai, Khmer, Lao)",
"",
"Sinhala",
"",
"",
"",
"Sinhala",
"Sinhala",
"Sinhala",
"",
"Indic",
"",
"",
"",
"",
"",
"Indic",
"Indic",
"Indic",
"Indic",
"Indic",
" Expressive\n Categories",
"Simplicity",
"",
"",
"",
"",
"",
"",
"",
"Simplicity",
"Simplicity",
"Simplicity",
"Simplicity",
"Simplicity",
"Simplicity",
"Simplicity",
"Youthful",
"Youthful",
"Youthful",
"Youthful",
"Youthful",
"Youthful",
"Youthful",
"Youthful",
"Youthful",
"",
"",
"",
"",
"",
"",
"",
"",
"Flow",
"",
"",
"",
"",
"",
"",
"",
"",
"Flow",
"Flow",
"Flow",
"Flow",
"Flow",
"Flow",
"Flow",
"Flow",
]
columns_1 = [
"",
Expand Down Expand Up @@ -463,6 +428,8 @@ def check_structure(self):
test_tags = [
# row 0
{"Family": "ABeeZee", "Group/Tag": "/Sans/Geometric", "Weight": 10},
# row 131
{"Family": "Akaya Kanadaka", "Group/Tag": "/Serif/Serif", "Weight": 10},
# row 330
{"Family": "Bonbon", "Group/Tag": "/Script/Handwritten", "Weight": 100},
# row 577
Expand Down

0 comments on commit e4bdb2d

Please sign in to comment.