Difference between revisions of "Relational and non-relational databases"
From Gridkaschool
Line 20: | Line 20: | ||
except: pass |
except: pass |
||
− | # create table |
+ | # create table and enable transactions |
p_cur.execute("CREATE TABLE large_table (l CHAR, s VARCHAR(255), n INTEGER)") |
p_cur.execute("CREATE TABLE large_table (l CHAR, s VARCHAR(255), n INTEGER)") |
||
+ | p_con.set_isolation_level(1) |
||
# insert random data |
# insert random data |
||
time_start = time.time() |
time_start = time.time() |
||
+ | for i in xrange(NR_OF_ROWS): |
||
− | |||
+ | p_cur.execute("INSERT INTO large_table VALUES ('%s', '%s', %i)" % (chr(random.randrange(65, 91)), |
||
− | # TODO: INSERT NR_OF_ROWS FAST, USE THE FOLLOWING RANDOM DATA AS VALUES |
||
− | + | ''.join([chr(random.randrange(65, 91)) for i in xrange(1,16)]), |
|
+ | random.randint(0,1000000))) |
||
− | # s --> ''.join([chr(random.randrange(65, 91)) for i in xrange(1,16)]) |
||
+ | p_con.commit() |
||
− | # n --> random.randint(0,1000000) |
||
− | |||
print 'insert', time.time()-time_start |
print 'insert', time.time()-time_start |
||
Line 38: | Line 38: | ||
p_cur.execute("ANALYZE large_table") |
p_cur.execute("ANALYZE large_table") |
||
+ | p_cur.execute("EXPLAIN SELECT COUNT(*) FROM large_table") |
||
− | # TODO: ANALYZE THE QUERY PLAN OF SOME QUERIES, SELECTING EVERYTHING, OR USING COMBINATIONS OF COLUMNS |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A'") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE s='ASDFGH'") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE n=3") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A' AND n=3") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A' AND s='ASDF' AND n=3") |
||
+ | for row in p_cur: |
||
+ | print row |
||
time_start = time.time() |
time_start = time.time() |
||
+ | p_cur.execute("CREATE INDEX idxu_large_table_lsn ON large_table(l, s, n)") |
||
− | # TODO: CREATE AN INDEX |
||
print 'index', time.time()-time_start |
print 'index', time.time()-time_start |
||
+ | p_cur.execute("EXPLAIN SELECT COUNT(*) FROM large_table") |
||
− | # TODO: ANALYZE THE QUERIES AGAIN |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A'") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE s='ASDFGH'") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE n=3") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A' AND n=3") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
+ | p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A' AND s='ASDF' AND n=3") |
||
+ | for row in p_cur: |
||
+ | print row |
||
+ | |||
# close everything |
# close everything |
||
p_cur.close() |
p_cur.close() |
||
− | p_con.close() |
+ | p_con.close()</pre> |
− | |||
− | </pre> |
||
Revision as of 15:25, 27 August 2013
import sys import random import uuid import time import psycopg2 NR_OF_ROWS = 1000000 # connect to the database and set transaction level to autocommit p_con = psycopg2.connect('host=localhost user=student password=student dbname=student') p_con.set_isolation_level(0) # 0 (AUTOCOMMIT), 1 (READ COMMIT), 3 (SERIALIZE) # get cursor p_cur = p_con.cursor() # drop old tables if they exist, ignore otherwise try: p_cur.execute("DROP TABLE large_table") except: pass # create table and enable transactions p_cur.execute("CREATE TABLE large_table (l CHAR, s VARCHAR(255), n INTEGER)") p_con.set_isolation_level(1) # insert random data time_start = time.time() for i in xrange(NR_OF_ROWS): p_cur.execute("INSERT INTO large_table VALUES ('%s', '%s', %i)" % (chr(random.randrange(65, 91)), ''.join([chr(random.randrange(65, 91)) for i in xrange(1,16)]), random.randint(0,1000000))) p_con.commit() print 'insert', time.time()-time_start # query plans p_cur.execute("ANALYZE large_table") p_cur.execute("EXPLAIN SELECT COUNT(*) FROM large_table") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A'") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE s='ASDFGH'") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE n=3") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A' AND n=3") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A' AND s='ASDF' AND n=3") for row in p_cur: print row time_start = time.time() p_cur.execute("CREATE INDEX idxu_large_table_lsn ON large_table(l, s, n)") print 'index', time.time()-time_start p_cur.execute("EXPLAIN SELECT COUNT(*) FROM large_table") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A'") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE s='ASDFGH'") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE n=3") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A' AND n=3") for row in p_cur: print row p_cur.execute("EXPLAIN SELECT * FROM large_table WHERE l='A' AND s='ASDF' AND n=3") for row in p_cur: print row # close everything p_cur.close() p_con.close()
import uuid import time import sqlite3 import MySQLdb import MySQLdb.cursors import psycopg2 NR_OF_ROWS = 1000000 # connect to the database and set transaction level to autocommit s_con = sqlite3.connect(database='/tmp/student.db', isolation_level=None) # None, 'DEFERRED', 'IMMEDIATE', 'EXCLUSIVE' m_con = MySQLdb.connect(host='localhost', user='student', passwd='student', db='student', cursorclass = MySQLdb.cursors.SSCursor) m_con.autocommit(True) # True, False p_con = psycopg2.connect('host=localhost user=student password=student dbname=student') p_con.set_isolation_level(0) # 0 (AUTOCOMMIT), 1 (READ COMMIT), 3 (SERIALIZE) # get cursor s_cur = s_con.cursor() m_cur = m_con.cursor() p_cur = p_con.cursor() # drop old tables if they exist, ignore otherwise try: s_cur.execute("DROP TABLE large_table") except: pass try: m_cur.execute("DROP TABLE large_table") except: pass try: p_cur.execute("DROP TABLE large_table") except: pass # create table s_cur.execute("CREATE TABLE large_table (id VARCHAR(255) PRIMARY KEY)") m_cur.execute("CREATE TABLE large_table (id VARCHAR(255) PRIMARY KEY) Engine=InnoDB") p_cur.execute("CREATE TABLE large_table (id VARCHAR(255) PRIMARY KEY)") # insert lots of rows and measure print 'sqlite:', time_start = time.time() for i in xrange(NR_OF_ROWS): pass # YOUR CODE HERE, USE str(uuid.uuid4()) print time.time()-time_start print 'mysql:', time_start = time.time() for i in xrange(NR_OF_ROWS): pass # YOUR CODE HERE, USE str(uuid.uuid4()) print time.time()-time_start print 'postgresql:', time_start = time.time() for i in xrange(NR_OF_ROWS): pass # YOUR CODE HERE, USE str(uuid.uuid4()) print time.time()-time_start # drop table s_cur.execute("DROP TABLE large_table") m_cur.execute("DROP TABLE large_table") p_cur.execute("DROP TABLE large_table") # close everything s_cur.close() m_cur.close() p_cur.close() s_con.close() m_con.close() p_con.close()
import sqlite3 import MySQLdb import MySQLdb.cursors import psycopg2 # connect to the database and set transaction level to autocommit s_con = sqlite3.connect(database='/tmp/student.db', isolation_level=None) # None, 'DEFERRED', 'IMMEDIATE', 'EXCLUSIVE' m_con = MySQLdb.connect(host='localhost', user='student', passwd='student', db='student', cursorclass = MySQLdb.cursors.SSCursor) m_con.autocommit(True) # True, False p_con = psycopg2.connect('host=localhost user=student password=student dbname=student') p_con.set_isolation_level(0) # 0 (AUTOCOMMIT), 1 (READ COMMIT), 3 (SERIALIZE) # get cursor s_cur = s_con.cursor() m_cur = m_con.cursor() p_cur = p_con.cursor() # drop old tables if they exist, ignore otherwise try: s_cur.execute("DROP TABLE test_table") except: pass try: m_cur.execute("DROP TABLE test_table") except: pass try: p_cur.execute("DROP TABLE test_table") except: pass # create table s_cur.execute("CREATE TABLE test_table (id INTEGER PRIMARY KEY, value VARCHAR(255))") m_cur.execute("CREATE TABLE test_table (id INTEGER PRIMARY KEY, value VARCHAR(255)) Engine=InnoDB") p_cur.execute("CREATE TABLE test_table (id INTEGER PRIMARY KEY, value VARCHAR(255))") # insert data s_cur.execute("INSERT INTO test_table(id, value) VALUES (0, 'my first value')") s_cur.execute("INSERT INTO test_table(id, value) VALUES (0, 'my second value')") s_cur.execute("INSERT INTO test_table(id, value) VALUES (0, 'my second value')") m_cur.execute("INSERT INTO test_table(id, value) VALUES (0, 'my first value')") m_cur.execute("INSERT INTO test_table(id, value) VALUES (0, 'my second value')") m_cur.execute("INSERT INTO test_table(id, value) VALUES (0, 'my second value')") p_cur.execute("INSERT INTO test_table(id, value) VALUES (0, 'my first value')") p_cur.execute("INSERT INTO test_table(id, value) VALUES (0, 'my second value')") p_cur.execute("INSERT INTO test_table(id, value) VALUES (0, 'my second value')") # selecting data (everything) print 'sqlite' s_cur.execute("SELECT id, value FROM test_table") print s_cur.fetchall() print 'mysql' m_cur.execute("SELECT id, value FROM test_table") print m_cur.fetchall() print 'postgresql' p_cur.execute("SELECT id, value FROM test_table") print p_cur.fetchall() # selecting data (iteratively) print 'sqlite' s_cur.execute("SELECT id, value FROM test_table") for row in s_cur: print row print 'mysql' m_cur.execute("SELECT id, value FROM test_table") for row in m_cur: print row print 'postgresql' p_cur.execute("SELECT id, value FROM test_table") for row in p_cur: print row # close everything s_cur.close() m_cur.close() p_cur.close() s_con.close() m_con.close() p_con.close()
import sqlite3 import MySQLdb import psycopg2 s_con = sqlite3.connect(database='/home/username/student.db') m_con = MySQLdb.connect(host='localhost', user='student', passwd='student', db='student') p_con = psycopg2.connect('host=localhost user=student password=student dbname=student') s_con.close() m_con.close() p_con.close()
CREATE TABLE test_table (id integer, name varchar(255));
Hosts:
ssh <username>@<host>.scc.kit.edu -p24
- gks-148
- gks-149
- gks-150
- gks-151
- gks-152
- gks-153
- gks-154
- gks-155
- gks-156
- gks-157
- gks-158
- gks-159
- gks-160
- gks-161
- gks-162
- gks-163
- gks-164
- gks-165
- gks-166
- gks-167
- gks-168
- gks-169
- gks-170
- gks-171
- gks-172
- gks-173
- gks-174