Skip to content

Commit

Permalink
Merge pull request #198 from mapswipe/dev
Browse files Browse the repository at this point in the history
Generate better geojson output and centroids
  • Loading branch information
Hagellach37 authored Sep 30, 2019
2 parents fda100a + 9acfbc8 commit 4b1b976
Showing 1 changed file with 134 additions and 4 deletions.
138 changes: 134 additions & 4 deletions mapswipe_workers/mapswipe_workers/generate_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from psycopg2 import sql
import dateutil
import dateutil.parser
import json

from mapswipe_workers import auth
from mapswipe_workers.definitions import logger
Expand Down Expand Up @@ -46,6 +47,10 @@ def generate_stats(only_new_results):
get_aggregated_results_by_project_id_geom(filename)
csv_to_geojson(filename)

filename = f'{DATA_PATH}/api-data/agg_res_by_project_id_centroid.csv'
get_aggregated_results_by_project_id_centroid(filename)
csv_to_geojson(filename)

filename = f'{DATA_PATH}/api-data/agg_projects.csv'
get_aggregated_projects(filename)

Expand All @@ -62,6 +67,10 @@ def generate_stats(only_new_results):
get_aggregated_progress_by_project_id_geom(filename)
csv_to_geojson(filename)

filename = f'{DATA_PATH}/api-data/agg_progress_by_project_id_centroid.csv'
get_aggregated_progress_by_project_id_centroid(filename)
csv_to_geojson(filename)

logger.info('start to export csv file for %s projects based on given project_id_list' % len(project_id_list))
for project_id in project_id_list:
filename = f'{DATA_PATH}/api-data/agg_res_by_task_id/agg_res_by_task_id_{project_id}.csv'
Expand Down Expand Up @@ -255,12 +264,44 @@ def get_aggregated_results_by_project_id_geom(filename):
sql_query = """COPY (
SELECT
r.*
,p.name
,p.project_details
,ST_AsText(p.geom) as geom
FROM
aggregated_results_by_project_id as r , projects as p
WHERE
r.project_id = p.project_id
) TO STDOUT WITH CSV HEADER"""
) TO STDOUT WITH (FORMAT CSV, HEADER, FORCE_QUOTE(project_id, name, project_details))"""

with open(filename, 'w') as f:
pg_db.copy_expert(sql_query, f)

del pg_db

logger.info('saved aggregated results by project_id to %s' % filename)


def get_aggregated_results_by_project_id_centroid(filename):
'''
Export results aggregated on project_id basis as csv file.
Parameters
----------
filename: str
'''

pg_db = auth.postgresDB()
sql_query = """COPY (
SELECT
r.*
,p.name
,p.project_details
,ST_AsText(ST_Centroid(p.geom)) as geom
FROM
aggregated_results_by_project_id as r , projects as p
WHERE
r.project_id = p.project_id
) TO STDOUT WITH (FORMAT CSV, HEADER, FORCE_QUOTE(project_id, name, project_details))"""

with open(filename, 'w') as f:
pg_db.copy_expert(sql_query, f)
Expand Down Expand Up @@ -382,20 +423,51 @@ def get_aggregated_progress_by_project_id_geom(filename):
filename: str
'''

# TODO: Export aggregated_progress_by_project_id_geom.csv as geojson

pg_db = auth.postgresDB()
sql_query = """
COPY (
SELECT
r.*
,p.name
,p.project_details
,ST_AsText(p.geom) as geom
FROM
aggregated_progress_by_project_id as r,
projects as p
WHERE
p.project_id = r.project_id
) TO STDOUT WITH CSV HEADER"""
) TO STDOUT WITH (FORMAT CSV, HEADER, FORCE_QUOTE(project_id, name, project_details))"""

with open(filename, 'w') as f:
pg_db.copy_expert(sql_query, f)

del pg_db
logger.info('saved aggregated progress by project_id to %s' % filename)


def get_aggregated_progress_by_project_id_centroid(filename):
'''
Export aggregated progress on a project_id basis as csv file.
Parameters
----------
filename: str
'''

pg_db = auth.postgresDB()
sql_query = """
COPY (
SELECT
r.*
,p.name
,p.project_details
,ST_AsText(ST_Centroid(p.geom)) as geom
FROM
aggregated_progress_by_project_id as r,
projects as p
WHERE
p.project_id = r.project_id
) TO STDOUT WITH (FORMAT CSV, HEADER, FORCE_QUOTE(project_id, name, project_details))"""

with open(filename, 'w') as f:
pg_db.copy_expert(sql_query, f)
Expand Down Expand Up @@ -546,3 +618,61 @@ def csv_to_geojson(filename):
f'SELECT *, CAST(geom as geometry) FROM "{filename_without_path}"'
], check=True)
logger.info(f'converted {filename} to {outfile}.')

cast_datatypes_for_geojson(outfile)


def csv_to_geojson_centroids(filename):
'''
Use ogr2ogr to convert csv file to GeoJSON
'''

outfile = filename.replace('.csv', '_centroids.geojson')

# need to remove file here because ogr2ogr can't overwrite when choosing GeoJSON
if os.path.isfile(outfile):
os.remove(outfile)
filename_without_path = filename.split('/')[-1].replace('.csv', '')
# TODO: remove geom column from normal attributes in sql query
subprocess.run([
"ogr2ogr",
"-f",
"GeoJSON",
outfile,
filename,
"-sql",
f'SELECT *, ST_Centroid(CAST(geom as geometry)) FROM "{filename_without_path}"'
], check=True)
logger.info(f'converted {filename} to {outfile}.')

cast_datatypes_for_geojson(outfile)


def cast_datatypes_for_geojson(filename):
'''
Go through geojson file and try to cast all values as float, except project_id
remove redundant geometry property
'''
filename = filename.replace('csv', 'geojson')
with open(filename) as f:
geojson_data = json.load(f)

properties = list(geojson_data['features'][0]['properties'].keys())

for i in range(0, len(geojson_data['features'])):
for property in properties:
if property in ['project_id', 'name', 'project_details', 'task_id', 'group_id']:
# don't try to cast project_id
pass
elif property in ['geom']:
# remove redundant geometry property
del geojson_data['features'][i]['properties'][property]
else:
try:
geojson_data['features'][i]['properties'][property] = float(geojson_data['features'][i]['properties'][property])
except:
pass

with open(filename, 'w') as f:
json.dump(geojson_data, f)
logger.info(f'converted datatypes for {filename}.')

0 comments on commit 4b1b976

Please sign in to comment.