Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using Presto with Cassandra on Docker (DATE field type error) #24293

Open
gmitis opened this issue Dec 21, 2024 · 0 comments
Open

Using Presto with Cassandra on Docker (DATE field type error) #24293

gmitis opened this issue Dec 21, 2024 · 0 comments
Labels

Comments

@gmitis
Copy link

gmitis commented Dec 21, 2024

Issue:

Specific error is:

Error: Codec not found for requested operation: ['org.apache.cassandra.db.marshal.SimpleDateType' <-> com.datastax.driver.core.LocalDate]

Using this option does not fix the issue for me:

Please specify cassandra.protocol-version=V4 in cassandra.properties.

Originally posted by @ebyhr in #587

Configuration:

I have set up cassandra and presto with docker as seen below:

services:
  presto-coordinator:
    image: prestodb/presto:latest
    container_name: presto-coordinator
    hostname: presto-coordinator
    ports:
      - "8080:8080"
    environment:
      - CATALOG_MANAGEMENT=dynamic
    volumes:
      - ./presto-coordinator/configuration/node.properties:/opt/presto-server/etc/node.properties
      - ./presto-coordinator/configuration/jvm.config:/opt/presto-server/etc/jvm.config
      - ./presto-coordinator/configuration/log.properties:/opt/presto-server/etc/log.properties
      - ./presto-coordinator/configuration/config.properties:/opt/presto-server/etc/config.properties
      - ./other_data:/opt/presto-server/etc/other_data
      - ./datasources/catalog:/opt/presto-server/etc/catalog
    networks:
      presto:
        ipv4_address: 10.18.0.8

  cassandra:
    image: cassandra:5.0.2-jammy
    container_name: cassandra
    hostname: cassandra
    environment:
      - CASSANDRA_CLUSTER_NAME="test"
      - CASSANDRA_SEEDS=cassandra
      - CASSANDRA_PASSWORD_SEEDER=true
      - CASSANDRA_USERNAME=root
      - CASSANDRA_PASSWORD=root
    ports:
      - "9042:9042"
    volumes:
      - cassandra_data:/var/lib/cassandra 
      - ./data/cassandra_data:/data
      - ./datasources/db_population_scripts/cassandra_tables.cql:/cassandra_tables.cql
    healthcheck:
      test: ["CMD", "cqlsh", "-u root", "-p root", "-e describe keyspaces"]
      interval: 15s
      timeout: 10s
      retries: 10
    networks:
      presto:
        ipv4_address: 10.18.0.118


  cassandra-load-keyspace:
    container_name: cassandra-load-keyspace
    image: cassandra:5.0.2-jammy
    depends_on:
      cassandra:
        condition: service_healthy
    volumes:
      - ./datasources/db_population_scripts/cassandra_tables.cql:/cassandra_tables.cql
      - ./data/cassandra_data:/data
    command: /bin/bash -c "echo Loading cassandra keyspace && cqlsh 10.18.0.118 -f /cassandra_tables.cql"
    networks:
      presto:
        ipv4_address: 10.18.0.119

sample data is on web_page.csv:

1,AAAAAAAABAAAAAAA,1997-09-03,,2450810,2452620,Y,98539,http://www.foo.com,welcome,2531,8,3,4
2,AAAAAAAACAAAAAAA,1997-09-03,2000-09-02,2450814,2452580,N,,http://www.foo.com,protected,1564,4,3,1
3,AAAAAAAACAAAAAAA,2000-09-03,,2450814,2452611,N,,http://www.foo.com,feedback,1564,4,3,4
4,AAAAAAAAEAAAAAAA,1997-09-03,1999-09-03,2450812,2452579,N,,http://www.foo.com,general,3732,18,7,1
5,AAAAAAAAEAAAAAAA,1999-09-04,2001-09-02,2450812,2452597,N,,http://www.foo.com,welcome,3732,18,3,1
6,AAAAAAAAEAAAAAAA,2001-09-03,,2450814,2452597,N,,http://www.foo.com,ad,3732,18,7,4
7,AAAAAAAAHAAAAAAA,1997-09-03,,2450815,2452574,N,,http://www.foo.com,feedback,3034,18,7,4
8,AAAAAAAAIAAAAAAA,1997-09-03,2000-09-02,2450815,2452646,Y,101898,http://www.foo.com,protected,3128,12,2,4
9,AAAAAAAAIAAAAAAA,2000-09-03,,2450807,2452579,Y,384146,http://www.foo.com,welcome,3128,13,5,3
10,AAAAAAAAKAAAAAAA,1997-09-03,1999-09-03,,2452623,N,,http://www.foo.com,,,,,

where cassandra_tables.cql is :

create keyspace adis with replication={'class':'SimpleStrategy', 'replication_factor': 1};
use adis;

-- Create the table `web_page` in Cassandra
CREATE TABLE web_page (
    wp_web_page_sk            INT, 
    wp_web_page_id            TEXT,                     
    wp_rec_start_date         DATE,                       
    wp_rec_end_date           DATE,
    wp_creation_date_sk       INT,
    wp_access_date_sk         INT,
    wp_autogen_flag           TEXT,                      
    wp_customer_sk            INT,
    wp_url                    TEXT,                      
    wp_type                   TEXT,
    wp_char_count             INT,
    wp_link_count             INT,
    wp_image_count            INT,
    wp_max_ad_count           INT,
    PRIMARY KEY(wp_web_page_sk)
);

COPY web_page(wp_web_page_sk, wp_web_page_id, wp_rec_start_date, wp_rec_end_date, wp_creation_date_sk, wp_access_date_sk, wp_autogen_flag, wp_customer_sk, wp_url, wp_type, wp_char_count, wp_link_count, wp_image_count, wp_max_ad_count) 
FROM '/data/web_page.csv'
WITH HEADER=TRUE AND NULL='';

And executing the following commands yields the codec error:

docker exec -it presto-coordinator /bin/bash

[root@presto-coordinator /]# cat opt/presto-server/etc/catalog/cassandra.properties 
connector.name=cassandra
cassandra.contact-points= 10.18.0.118
cassandra.native-protocol-port=9042
cassandra.allow-drop-table=true
cassandra.protocol-version=V4
cassandra.username=root
cassandra.password=root

[root@presto-coordinator /]# ./opt/presto-cli --server localhost:8080 --catalog cassandra --schema adis  --execute 'select * from web_page limit 10;'
Query 20241221_102007_00007_sakpn failed: Codec not found for requested operation: ['org.apache.cassandra.db.marshal.SimpleDateType' <-> com.datastax.driver.core.LocalDate]

[root@presto-coordinator /]# ./opt/presto-cli --server localhost:8080 --catalog cassandra --schema adis  --execute ' describe web_page ;'
"wp_web_page_sk","integer","",""
"wp_access_date_sk","integer","",""
"wp_autogen_flag","varchar","",""
"wp_char_count","integer","",""
"wp_creation_date_sk","integer","",""
"wp_customer_sk","integer","",""
"wp_image_count","integer","",""
"wp_link_count","integer","",""
"wp_max_ad_count","integer","",""
"wp_rec_end_date","date","",""
"wp_rec_start_date","date","",""
"wp_type","varchar","",""
"wp_url","varchar","",""
"wp_web_page_id","varchar","",""
[root@presto-coordinator /]# 

Repo:
https://github.com/gmitis/ADIS_project24

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
Status: 🆕 Unprioritized
Development

No branches or pull requests

1 participant