Skip to content

Apr 04: Creating Data, DB-API

Learning Objectives

After today's class, you should be able to:

  • Write SQL scripts that import data using \copy (or COPY).
  • Run SELECT, INSERT, and DELETE queries using psycopg.

Lesson Outline

Announcement: HW7 has been postponed. We'll focus on GP3 today.

GP3 Scripts [30 min]

Work Time [15 min]

  • Coordinate plans for finishing GP3
  • Start/keep writing your copy scripts
  • Finish setting up Flask and Psycopg

Python DB-API [30 min]

  • Running SQL queries in Python
  • Representing data (pros and cons)
    • With tuples/lists/dicts
    • With classes/objects

Example Code

Note: These files are a starting point. We'll add more code during class.

demo_create.sql
--------------------------------------------------------------------------------
-- Drop Tables
--------------------------------------------------------------------------------

DROP TABLE IF EXISTS hotel_phone;
DROP TABLE IF EXISTS hotel;

--------------------------------------------------------------------------------
-- Create Tables
--------------------------------------------------------------------------------

CREATE TABLE hotel (
    hotel_id integer NOT NULL,
    name text NOT NULL,
    address text NOT NULL,
    year integer NOT NULL
);

ALTER TABLE hotel OWNER TO absent;

COMMENT ON TABLE hotel IS 'Hotel where guests can stay';


CREATE TABLE hotel_phone (
    hotel_id integer NOT NULL,
    sort integer NOT NULL,
    phone text NOT NULL,
    label text
);

ALTER TABLE hotel_phone OWNER TO absent;

COMMENT ON TABLE hotel_phone IS 'Phone numbers for a hotel';
demo_alter.sql
--------------------------------------------------------------------------------
-- Primary Keys
--------------------------------------------------------------------------------

ALTER TABLE hotel
    ADD PRIMARY KEY (hotel_id);

ALTER TABLE hotel_phone
    ADD PRIMARY KEY (hotel_id, sort);

--------------------------------------------------------------------------------
-- Foreign Keys
--------------------------------------------------------------------------------

ALTER TABLE hotel_phone
    ADD FOREIGN KEY (hotel_id) REFERENCES hotel;
demo_load.sql

Note: When using STDIN, the columns must be tab-delimited.

\copy hotel FROM STDIN
1   Hotel Madison   710 S Main St, Harrisonburg, VA 22801   2018
2   The Plaza Hotel 768 5th Ave, New York, NY 10019 1907
\.

\copy hotel_phone FROM hotel_phone.csv WITH CSV HEADER
Tip: Connecting both on/off campus
  • When on campus, you can connect directly to data.cs.jmu.edu.
  • When off campus, first set up an ssh tunnel to stu.cs.jmu.edu:

    ssh -L 5432:data.cs.jmu.edu:5432 username@stu.cs.jmu.edu
    
  • Once ssh is running, connect to localhost to use the tunnel.

  • The following code automatically figures out how to connect:

    import psycopg
    import socket
    
    # Determine whether running on/off campus
    # Note: PGUSER and PGPASSWORD must be set
    try:
        socket.gethostbyname("data.cs.jmu.edu")
        DSN = "host=data.cs.jmu.edu dbname=postgres"
    except:
        DSN = "host=localhost dbname=postgres"
    
    with psycopg.connect(DSN) as conn:
        with conn.cursor() as cur:
            cur.execute("SELECT 'It works!'")
            print(cur.fetchone())
    

Posted After Class

demo_hotel.py
"""Queries for the hotel table."""

import psycopg
import socket

# Determine whether running on/off campus
# Note: PGUSER and PGPASSWORD must be set
try:
    socket.gethostbyname("data.cs.jmu.edu")
    DSN = "host=data.cs.jmu.edu dbname=absent"
except:
    DSN = "host=localhost dbname=absent"


def select_all():
    with psycopg.connect(DSN) as conn:
        with conn.cursor() as cur:
            cur.execute("SELECT * FROM hotel")
            return cur.fetchall()


def select(hotel_id):
    with psycopg.connect(DSN) as conn:
        with conn.cursor() as cur:
            cur.execute("SELECT * FROM hotel WHERE hotel_id = %s", [hotel_id])
            return cur.fetchone()


def insert(data):
    with psycopg.connect(DSN) as conn:
        with conn.cursor() as cur:
            cur.execute("INSERT INTO hotel VALUES (%s, %s, %s, %s)", data)
            return cur.rowcount


def delete(hotel_id):
    with psycopg.connect(DSN) as conn:
        with conn.cursor() as cur:
            cur.execute("DELETE FROM hotel WHERE hotel_id = %s", [hotel_id])
            return cur.rowcount


if __name__ == "__main__":
    print(select_all())
    count = insert((2, "Hotel Madison", "710 S Main St, Harrisonburg, VA 22801", 2018))
    print("Inserted", count, "row(s)")
    print(select(2))
    count = delete(2)
    print("Deleted", count, "row(s)")
    print(select_all())
demo_generate.py
"""Generate fake data for the hotel database."""

from faker import Faker
import random

fake = Faker()


def make_hotels():
    hotels = []
    for id in range(1, 21):
        h = []
        h.append(id)
        h.append(fake.color_name() + " Hotel")
        h.append(fake.street_address())
        h.append(fake.city())
        h.append(fake.country())
        h.append(random.randint(1900, 2024))
        hotels.append(h)
    return hotels


def make_hotel_phones(hotels):
    hotel_phones = []
    for h in hotels:
        # a hotel has between 1 and 4 phone numbers
        phones = random.randint(1, 4)
        # phones+1 because the end of the range is not inclusive
        for i in range(1, phones+1):
            hp = []
            hp.append(h[0])  # hotel id
            hp.append(i)
            hp.append(fake.phone_number())
            # make a random choice from a list
            hp.append(random.choice(["Front Desk", "Reservations", "", "Security"]))
            hotel_phones.append(hp)
    return hotel_phones


if __name__ == "__main__":
    # Output everything tab-separated
    hotels = make_hotels()
    for h in hotels:
        for attr in h:
            print(attr, end="\t")
        print()
    hotel_phones = make_hotel_phones(hotels)
    for hp in hotel_phones:
        for attr in hp:
            print(attr, end="\t")
        print()