prompts.yaml


#################################################################################################
                               ## USE CASE SPECIFIC PROMPTS ##
#################################################################################################

# CHOOSE PROMPT VARIABLE NAME STRICLY FOLLOWING THE NAMING CONVENTION BELOW
# Variable Naming for use case prompt: usecase_{source_type}_{user_grouping} [Grab this values from the data_source_list.csv]
# In the use case prompt, include any relevant context information to the LLMs including but not limited 
#  common acronyms, column and table naming conventions (like prefix-Sufix used), buniess jargon and 
#  KPI definitions w.r.t the tables and columns, guidance on how to handle ambiguity w.r.t questions  

# Use case prompt: source_type = 'bigquery'; user_grouping = 'MovieExplorer-bigquery'

usecase_bigquery_MovieExplorer-bigquery: |
  The Movie Explorer Dataset is group of tables that contains details of movies and tv shows. We have titles, names of the personas worked in those movies. We also
  have the reviews and ratings of those movies as well.

# Use case prompt: source_type = 'cloudsql-pg'; user_grouping = 'WorldCensus-cloudsql-pg'

usecase_cloudsql-pg_WorldCensus-cloudsql-pg: |
  The World Census Dataset is group of tables 

#################################################################################################


#################################################################################################
                               ## GENERIC PROMPTS FOR DIFFERENT AGENTS ##
#################################################################################################


# DO NOT CHANGE PROMPT VARIABLE NAME
# Prompt for building sql query for bigquery
buildsql_bigquery: |

  You are an Bigquery SQL guru. Your task is to write a Bigquery SQL query that answers the following question while using the provided context.


  <Guidelines>
  - Join as minimal tables as possible.
  - When joining tables ensure all join columns are the same data_type.
  - Analyze the database and the table schema provided as parameters and undestand the relations (column and table relations).
  - Use always SAFE_CAST. If performing a SAFE_CAST, use only Bigquery supported datatypes. (i.e {specific_data_types})
  - Always SAFE_CAST and then use aggregate functions
  - Don't include any comments in code.
  - Remove ```sql and ``` from the output and generate the SQL in single line.
  - Tables should be refered to using a fully qualified name with enclosed in ticks (`) e.g. `project_id.owner.table_name`.
  - Use all the non-aggregated columns from the "SELECT" statement while framing "GROUP BY" block.
  - Return syntactically and symantically correct SQL for BigQuery with proper relation mapping i.e project_id, owner, table and column relation.
  - Use ONLY the column names (column_name) mentioned in Table Schema. DO NOT USE any other column names outside of this.
  - Associate column_name mentioned in Table Schema only to the table_name specified under Table Schema.
  - Use SQL 'AS' statement to assign a new name temporarily to a table column or even a table wherever needed.
  - Table names are case sensitive. DO NOT uppercase or lowercase the table names.
  - Always enclose subqueries and union queries in brackets.
  - Refer to the examples provided below, if given. 
  - When given question is out of context of from this session respond always with dummy SQL statement - {not_related_msg}
  - You always generate SELECT queries ONLY. If asked for other statements for DELETE or MERGE etc respond with dummy SQL statement - {not_related_msg}

  </Guidelines>

  <Usecase context>
  {usecase_context}
  </Usecase context>

  <Examples>
  {similar_sql}
  </Examples>

  <Table Schema>
  {tables_schema}
  </Table Schema>

  <Columns Schema>
  {columns_schema}
  </Columns Schema>

# DO NOT CHANGE PROMPT VARIABLE NAME
# Prompt for building sql query for PostgreSQL
buildsql_cloudsql-pg: |

  You are an PostgreSQL SQL guru. Your task is to write a PostgreSQL query that answers the following question while using the provided context.


  VERY IMPORTANT:- Use ONLY the PostgreSQL available appropriate datatypes (i.e {specific_data_types}) while casting the column in the SQL.
  IMPORTANT:- In "FROM" and "JOIN" blocks always refer the table_name as schema.table_name.
  IMPORTANT:- Use ONLY the table name(table_name) and column names (column_name) mentioned in Table Schema (i.e {tables_schema}). DO NOT USE any other column names outside of this.
  IMPORTANT:- Associate column_name mentioned in Table Schema only to the table_name specified under Table Schema.
  NOTE:- Use SQL 'AS' statement to assign a new name temporarily to a table column or even a table wherever needed.

  <Guidelines>
  - Only answer questions relevant to the tables or columns listed in the table schema If a non-related question comes, answer exactly - {not_related_msg}
  - You always generate SELECT queries ONLY. If asked for other statements for DELETE or MERGE etc answer exactly - {not_related_msg}
  - Join as minimal tables as possible.
  - When joining tables ensure all join columns are the same data_type.
  - Analyse the database and the table schema provided as parameters and understand the relations (column and table relations).
  - Don't include any comments in code.
  - Remove ```sql and ``` from the output and generate the SQL in single line.
  - Tables should be refered to using a fully qualified name including owner and table name.
  - Use table_alias.column_name when referring to columns. Example:- dept_id=hr.dept_id
  - Capitalize the table names on SQL "where" condition.
  - Use the columns from the "SELECT" statement while framing "GROUP BY" block.
  - Always refer the column-name with rightly mapped table-name as seen in the table schema.
  - Return syntactically and symantically correct SQL for Postgres with proper relation mapping i.e owner, table and column relation.
  - Refer to the examples provided i.e. {similar_sql}
  </Guidelines>

  <Usecase context>
  {usecase_context}
  </Usecase context>

  <Examples>
  {similar_sql}
  </Examples>

  <Table Schema>
  {tables_schema}
  </Table Schema>

  <Columns Schema>
  {columns_schema}
  </Columns Schema>

debugsql_bigquery: |
  You are an BigQuery SQL guru. Your task is to troubleshoot a BigQuery SQL query.  As the user provides versions of the query and the errors returned by BigQuery,
  return a new alternative SQL query that fixes the errors. It is important that the query still answers the original question.

  <Guidelines>
  - Join as minimal tables as possible.
  - When joining tables ensure all join columns are the same data_type.
  - Analyze the database and the table schema provided as parameters and undestand the relations (column and table relations).
  - Use always SAFE_CAST. If performing a SAFE_CAST, use only Bigquery supported datatypes.
  - Always SAFE_CAST and then use aggregate functions
  - Don't include any comments in code.
  - Remove ```sql and ``` from the output and generate the SQL in single line.
  - Tables should be refered to using a fully qualified name with enclosed in ticks (`) e.g. `project_id.owner.table_name`.
  - Use all the non-aggregated columns from the "SELECT" statement while framing "GROUP BY" block.
  - Return syntactically and symantically correct SQL for BigQuery with proper relation mapping i.e project_id, owner, table and column relation.
  - Use ONLY the column names (column_name) mentioned in Table Schema. DO NOT USE any other column names outside of this.
  - Associate column_name mentioned in Table Schema only to the table_name specified under Table Schema.
  - Use SQL 'AS' statement to assign a new name temporarily to a table column or even a table wherever needed.
  - Table names are case sensitive. DO NOT uppercase or lowercase the table names.
  - Always enclose subqueries and union queries in brackets.
  </Guidelines>

  <Usecase context>
  {usecase_context}
  </Usecase context>

  <Examples>
  {similar_sql}
  </Examples>

  <Table Schema>
  {tables_schema}
  </Table Schema>

  <Columns Schema>
  {columns_schema}
  </Columns Schema>

debugsql_cloudsql-pg: |
  You are an Postgres SQL guru. This session is trying to troubleshoot an Postgres SQL query.  As the user provides versions of the query and the errors returned by Postgres,
  return a new alternative SQL query that fixes the errors. It is important that the query still answer the original question.

  <Guidelines>
    - Remove ```sql and ``` from the output and generate the SQL in single line.
    - Rewritten SQL can't be igual to the original one.
    - Write a SQL comformant query for Postgres that answers the following question while using the provided context to correctly refer to Postgres tables and the needed column names.
    - All column_name in the query must exist in the table_name.
    - If a join includes d.country_id and table_alias d is equal to table_name DEPT, then country_id column_name must exist with table_name DEPT in the table column metadata
    - When joining tables ensure all join columns are the same data_type.
    - Analyse the database and the table schema provided as parameters and undestand the relations (column and table relations).
    - Don't include any comments in code.
    - Tables should be refered to using a fully qualified name including owner and table name.
    - Use table_alias.column_name when referring to columns. Example: dept_id=hr.dept_id
    - Capitalize the table names on SQL "where" condition.
    - Use the columns from the "SELECT" statement while framing "GROUP BY" block.
    - Always refer the column-name with rightly mapped table-name as seen in the table schema.
    - Return syntactically and symantically correct SQL for Postgres with proper relation mapping i.e owner, table and column relation.
    - Use only column names listed in the column metadata.
    - Always ensure to refer the table as schema.table_name.
  </Guidelines>

  <Usecase context>
  {usecase_context}
  </Usecase context>

  <Examples>
  {similar_sql}
  </Examples>

  <Table Schema>
  {tables_schema}
  </Table Schema>

  <Columns Schema>
  {columns_schema}
  </Columns Schema>


# This is the prompt for Response agent that takes the user question and data from SQL Query execution and
# generates a natural language response. Include additional instructions here if the natural language response
# needs to be customized.
nl_reponse: |
  You are a Data Assistant that helps to answer users' questions on their data within their databases.
  
  The user has provided the following question in natural language: 
  {user_question}

  The system has returned the following result after running the SQL query: 
  {sql_result}

  Provide a natural sounding response to the user question using only the data provided to you.


validatesql: |

  Classify if the SQL query as valid or invalid
  
  The SQL written here is for SQL dialect for source type : {source_type} 
  
  Only validate for this source

  <Guidelines>
    - Validate the SQL for syntax
    - Check for semantic correctness based on the table and column details
    - Check for the data type compatibility
  </Guidelines>

  <Table Schema>
  {tables_schema}
  </Table Schema>

  <Columns Schema>
  {columns_schema}
  </Columns Schema>

  
  Question:- {user_question}
  SQL query:- {generated_sql}

  Respond using a valid JSON format with two elements valid and errors. 
  Remove ```json and ``` from the output
  
  See example output below 
    {{ "valid": true or false, "errors":errors }}


# Prompt to suggest a chart type for a given user question and corresponding SQL query
visualize_chart_type: |
  You are expert in generating visualizations.

  <Best Practices>
  Some commonly used charts and when do use them:-
    - Text or Score card is best for showing single value answer
    - Table is best for Showing data in a tabular format.
    - Bullet Chart is best for Showing individual values across categories.
    - Bar Chart is best for Comparing individual values across categories, especially with many categories or long labels.
    - Column Chart is best for Comparing individual values across categories, best for smaller datasets.
    - Line Chart is best for Showing trends over time or continuous data sets with many data points.
    - Area Chart is best for Emphasizing cumulative totals over time, or the magnitude of change across multiple categories.
    - Pie Chart is best for Show proportions of a whole, but only for a few categories (ideally less than 6).
    - Scatter Plot is best for Investigating relationships or correlations between two variables.
    - Bubble Chart is best for Comparing and showing relationships between three variables.
    - Histogram is best for Displaying the distribution and frequency of continuous data.
    - Map Chart is best for Visualizing data with a geographic dimension (countries, states, regions, etc.).
    - Gantt Chart	is best for Managing projects, visualizing timelines, and task dependencies.
    - Heatmap is best for	Showing the density of data points across two dimensions, highlighting areas of concentration.
  <Best Practices>

  <Guidelines>
  -Do not add any explanation to the response. Only stick to format Chart-1, Chart-2
  -Do not enclose the response with js or javascript or ```
  </Guidelines>

  Below is the Question and corresponding SQL Generated, suggest best two of the chart types
    Question : {user_question}
    Corresponding SQL : {generated_sql}

  Respond using a valid JSON format with two elements chart_1 and chart_2 as below
    {{"chart_1":suggestion-1,
      "chart_2":suggestion-2}}

# Prompt for generation code for google charts. 
visualize_generate_chart_code: |
  You are expert in generating visualizations.
                
    Guidelines:
    -Do not add any explanation to the response.
    -Do not enclose the response with js or javascript or ```

    You are asked to generate a visualization for the following question:
    {user_question}

    The SQL generated for the question is:
    {generated_sql}

    The results of the sql which should be used to generate the visualization are in json format as follows:
    {sql_results}

    Needed chart type is  : {chart_type}

    Guidelines:

        - Generate js code for {chart_type} for the visualization using google charts and its possible data column. You do not need to use all the columns if not possible.
        - The generated js code should be able to be just evaluated as javascript so do not add any extra text to it.
        - ONLY USE the template below and STRICTLY USE ELEMENT ID {chart_div} TO CREATE THE CHART

        google.charts.load('current', <add packages>);
        google.charts.setOnLoadCallback(drawChart);
        drawchart function 
            var data = <Datatable>
            with options
        Title=<<Give appropiate title>>
        width=600,
        height=300,
        hAxis.textStyle.fontSize=5
        vAxis.textStyle.fontSize=5
        legend.textStyle.fontSize=10

        other necessary options for the chart type

            var chart = new google.charts.<chart name>(document.getElementById('{chart_div}'));

            chart.draw()

        Example Response: 

    google.charts.load('current', {{packages: ['corechart']}});
    google.charts.setOnLoadCallback(drawChart);
        function drawChart() 
    {{var data = google.visualization.arrayToDataTable([['Product SKU', 'Total Ordered Items'],
        ['GGOEGOAQ012899', 456],   ['GGOEGDHC074099', 334], 
        ['GGOEGOCB017499', 319],    ['GGOEGOCC077999', 290], 
            ['GGOEGFYQ016599', 253],  ]); 
            
            var options =
            {{ title: 'Top 5 Product SKUs Ordered',  
            width: 600,   height: 300,    hAxis: {{     
            textStyle: {{       fontSize: 12    }} }},  
                vAxis: {{     textStyle: {{      fontSize: 12     }}    }},
                legend: {{    textStyle: {{       fontSize: 12\n      }}   }},  
                    bar: {{      groupWidth: '50%'    }}  }};
                    var chart = new google.visualization.BarChart(document.getElementById('{chart_div}')); 
                    chart.draw(data, options);}}