Skip to content

Commit

Permalink
fix: Correct indentation, so that CodeQL can work with the code (#11166)
Browse files Browse the repository at this point in the history
  • Loading branch information
hangy authored Dec 22, 2024
1 parent 7c0df2d commit 0178ac2
Show file tree
Hide file tree
Showing 28 changed files with 778 additions and 601 deletions.
110 changes: 57 additions & 53 deletions packager-codes/get_packager_code_from_html_ireland.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,114 +8,118 @@

# In[]:

import pandas as pd
urls = ['https://oapi.fsai.ie/LAApprovedEstablishments.aspx',
'https://oapi.fsai.ie/AuthReg99901Establishments.aspx',
'https://oapi.fsai.ie/HSEApprovedEstablishments.aspx'
]
]
urls_second_format = ['https://www.sfpa.ie/Seafood-Safety/Registration-Approval-of-Businesses/List-of-Approved-Establishments-and-Vessels/Approved-Establishments',
'https://www.sfpa.ie/Seafood-Safety/Registration-Approval-of-Businesses/Approved-Freezer-Vessels'
]
]

csv_file = 'Ireland_concatenated.csv'

import pandas as pd
pages = [pd.read_html(url) for url in urls]
pages2= [pd.read_html(url) for url in urls_second_format]
pages2 = [pd.read_html(url) for url in urls_second_format]


# In[]:

def ireland_correction_of_1_dataframe(df): #Version to get anything
#print ("df as recuperated :")
#print(df.head())
def ireland_correction_of_1_dataframe(df): # Version to get anything
# print ("df as recuperated :")
# print(df.head())
df.columns = df.iloc[[0]].values.tolist()
df = df.rename(columns={' Address': 'Address'})
df=df.drop(df.index[0]) #
df = df.drop(df.index[0])
row_reference = df.iloc[0]

if 'Approval_Number' not in df.columns:
print("this table has no approval number and was not added")
return pd.DataFrame()

df_is_null=df.isnull()
for i in range(1,len(df)): #len(df)
if df_is_null.iloc[i,len(df.columns)-1]: #We assume that on a row, there is no merged cell(null in pandas) on the webpage after an unmerged cell (not null)
row_retrieved=[]
df_is_null = df.isnull()
for i in range(1, len(df)): # len(df)
# We assume that on a row, there is no merged cell(null in pandas) on the webpage after an unmerged cell (not null)
if df_is_null.iloc[i, len(df.columns)-1]:
row_retrieved = []
value = ""
j=0
while not df_is_null.iloc[i,j]:
value=df.iloc[i,j]
j = 0
while not df_is_null.iloc[i, j]:
value = df.iloc[i, j]
row_retrieved.append(value)
#print("while loop - j:"+str(j)+ "value : "+str(value))
j+=1
# print("while loop - j:"+str(j)+ "value : "+str(value))
j += 1
row = row_reference.copy()
row[len(row)-len(row_retrieved):len(row)]=row_retrieved
df.iloc[i]= row

row_reference =df.iloc[i]
row[len(row)-len(row_retrieved):len(row)] = row_retrieved
df.iloc[i] = row

row_reference = df.iloc[i]

df["Address"]=df["Address"].apply(add_space_before_uppercase)
df["Address"] = df["Address"].apply(add_space_before_uppercase)

#print ("result corrected : ")
#print(df.head())
# print ("result corrected : ")
# print(df.head())
return df

#df=pages[0][18]
#ireland_correction_of_1_dataframe(df)
# df=pages[0][18]
# ireland_correction_of_1_dataframe(df)


# In[]:

def add_space_before_uppercase(words):
result=""
for s in words:
if isinstance(s, str):
if s.isupper():
result+=" "
result+=s
return result
result = ""
for s in words:
if isinstance(s, str):
if s.isupper():
result += " "
result += s
return result


""" This could have been done more efficienty using Regex r"[a-z][A-Z]"" and avoid r" [A-Z]". But google maps recognize it this way."""


# In[ ]:

df=pd.DataFrame()
df = pd.DataFrame()


# In[]:

i=0
i = 0
for page in pages:
j=0
j = 0
for table in page:
df=df.append(ireland_correction_of_1_dataframe(table), ignore_index=True)
#print ("table "+str(j)+" is ok")
#j+=1
print ("page "+str(i)+" is done")
i+=1
df = df.append(ireland_correction_of_1_dataframe(
table), ignore_index=True)
# print ("table "+str(j)+" is ok")
# j+=1
print("page "+str(i)+" is done")
i += 1
print("finished for all in urls!")


# In[]:

i=0
i = 0
for page2 in pages2:
j=0
j = 0
for table in page2:
#print (table.head(3))
table=table.drop(table.index[0])
table.loc[0,0]='Approval_Number'
#print (ireland_correction_of_1_dataframe(table).head())
df=df.append(ireland_correction_of_1_dataframe(table), ignore_index=True)
print ("table "+str(j)+" is ok")
j+=1
print ("page "+str(i)+" is done")
i+=1
# print (table.head(3))
table = table.drop(table.index[0])
table.loc[0, 0] = 'Approval_Number'
# print (ireland_correction_of_1_dataframe(table).head())
df = df.append(ireland_correction_of_1_dataframe(
table), ignore_index=True)
print("table "+str(j)+" is ok")
j += 1
print("page "+str(i)+" is done")
i += 1
print("finished for table in urls_second_format!")


# In[]:


df.to_csv(csv_file, index = False)
df.to_csv(csv_file, index=False)
18 changes: 11 additions & 7 deletions scripts/generate_dump_for_offline_apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import pandas


def main():
if not (os.getenv('OFF_PUBLIC_DATA_DIR') and os.getenv('PRODUCT_OPENER_FLAVOR') and os.getenv('PRODUCT_OPENER_FLAVOR_SHORT')):
print("Environment variables OFF_PUBLIC_DATA_DIR, PRODUCT_OPENER_FLAVOR and PRODUCT_OPENER_FLAVOR_SHORT are required")
Expand All @@ -13,15 +14,18 @@ def main():

if not os.path.exists(off_public_data_dir + '/offline'):
os.makedirs(off_public_data_dir + '/offline')

df = pandas.read_csv(off_public_data_dir + '/en.' + product_opener_flavor + '.org.products.csv', sep='\t', low_memory=False)
colnames = ['code','product_name','quantity','brands']

df = pandas.read_csv(off_public_data_dir + '/en.' + product_opener_flavor +
'.org.products.csv', sep='\t', low_memory=False)
colnames = ['code', 'product_name', 'quantity', 'brands']
# add 'nutriscore_grade','nova_group','environmental_score_grade' columns if the flavor is off
if product_opener_flavor_short == 'off':
colnames = colnames + ['nutriscore_grade','nova_group','environmental_score_grade']
colnames = colnames + ['nutriscore_grade',
'nova_group', 'environmental_score_grade']

df.rename(columns={'nutriscore_grade': 'nutrition_grade_fr'}).to_csv(off_public_data_dir + '/offline/en.' +
product_opener_flavor + '.org.products.small.csv', columns=colnames, sep='\t', index=False)


df.rename(columns={'nutriscore_grade': 'nutrition_grade_fr'}).to_csv(off_public_data_dir + '/offline/en.' + product_opener_flavor + '.org.products.small.csv', columns = colnames,sep='\t',index=False)

if __name__ == '__main__':
main()

96 changes: 51 additions & 45 deletions scripts/mappingGES.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,75 +14,81 @@

temporary_exists = os.path.isfile(PATH_TO_TEMPORARY)
if temporary_exists:
print "The temporary file already exists"
exit()
print "The temporary file already exists"
exit()

ingredients_exists = os.path.isfile(PATH_TO_INGREDIENTS)
if not ingredients_exists:
print "The ingredient file does not exist, check the path :" + PATH_TO_INGREDIENTS
exit()
print "The ingredient file does not exist, check the path :" + PATH_TO_INGREDIENTS
exit()

foodGES_exists = os.path.isfile(PATH_TO_FOODGES)
if not foodGES_exists:
print "The foodGES file does not exist, check the path :" + PATH_TO_FOODGES
exit()
print "The foodGES file does not exist, check the path :" + PATH_TO_FOODGES
exit()


def check_next_lines(ingredients):
next_line_is_not_foodges = True
keep_lines = []
while next_line_is_not_foodges:
next_line = ingredients.readline()
keep_lines.append(next_line)
if STRING_FOODGES_VALUE not in next_line and STRING_FOODGES_INGREDIENT not in next_line:
next_line_is_not_foodges = False
return keep_lines
next_line_is_not_foodges = True
keep_lines = []
while next_line_is_not_foodges:
next_line = ingredients.readline()
keep_lines.append(next_line)
if STRING_FOODGES_VALUE not in next_line and STRING_FOODGES_INGREDIENT not in next_line:
next_line_is_not_foodges = False
return keep_lines


def write_next_lines(next_lines, temporary_file):
size = len(next_lines)
for i in range(0, size-1):
line = next_lines[i]
if STRING_FOODGES_INGREDIENT in line:
temporary_file.write(line)
if line.rstrip("\n") not in dict:
print("this mapping is not known : " + line.rstrip("\n"))
else:
temporary_file.write(STRING_FOODGES_VALUE + dict.get(line.rstrip("\n")) + "\n")
if line.rstrip("\n") in unused_mappings:
unused_mappings.remove(line.rstrip("\n"))
temporary_file.write(next_lines[size-1])
size = len(next_lines)
for i in range(0, size-1):
line = next_lines[i]
if STRING_FOODGES_INGREDIENT in line:
temporary_file.write(line)
if line.rstrip("\n") not in dict:
print("this mapping is not known : " + line.rstrip("\n"))
else:
temporary_file.write(
STRING_FOODGES_VALUE + dict.get(line.rstrip("\n")) + "\n")
if line.rstrip("\n") in unused_mappings:
unused_mappings.remove(line.rstrip("\n"))
temporary_file.write(next_lines[size-1])


with open(PATH_TO_FOODGES, 'r') as csvFile:
reader = csv.reader(csvFile)
for row in reader:
dict[row[2]]=row[1]
unused_mappings.append(row[2])
reader = csv.reader(csvFile)
for row in reader:
dict[row[2]] = row[1]
unused_mappings.append(row[2])

csvFile.close()

temporary_file = open(PATH_TO_TEMPORARY,"w+")
temporary_file = open(PATH_TO_TEMPORARY, "w+")
ingredients = file(PATH_TO_INGREDIENTS)

while True:
line = ingredients.readline()
temporary_file.write(line)
if not line: break
if STRING_FOODGES_INGREDIENT in line:
if line.rstrip("\n") not in dict:
print("this mapping is not known : " + line.rstrip("\n"))
else:
temporary_file.write(STRING_FOODGES_VALUE + dict.get(line.rstrip("\n")) + "\n")
if line.rstrip("\n") in unused_mappings:
unused_mappings.remove(line.rstrip("\n"))
next_lines = check_next_lines(ingredients)
write_next_lines(next_lines, temporary_file)
line = ingredients.readline()
temporary_file.write(line)
if not line:
break
if STRING_FOODGES_INGREDIENT in line:
if line.rstrip("\n") not in dict:
print("this mapping is not known : " + line.rstrip("\n"))
else:
temporary_file.write(STRING_FOODGES_VALUE +
dict.get(line.rstrip("\n")) + "\n")
if line.rstrip("\n") in unused_mappings:
unused_mappings.remove(line.rstrip("\n"))
next_lines = check_next_lines(ingredients)
write_next_lines(next_lines, temporary_file)

ingredients.close()
temporary_file.close()
temporary_file.close()

os.remove(PATH_TO_INGREDIENTS)
os.rename(PATH_TO_TEMPORARY, PATH_TO_INGREDIENTS)

print("\n")
print "This is the list of unused mapping : "
for mapping in unused_mappings:
print mapping
print mapping
Loading

0 comments on commit 0178ac2

Please sign in to comment.