Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

miniDB Project 2022-2023 P20176, P20235 #218

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Binary file added .DS_Store
Binary file not shown.
35 changes: 30 additions & 5 deletions mdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import shutil
sys.path.append('miniDB')

from database import Database
from table import Table
from miniDB.database import Database
from miniDB.table import Table
# art font is "big"
art = '''
_ _ _____ ____
Expand Down Expand Up @@ -74,7 +74,7 @@ def create_query_plan(query, keywords, action):

if action == 'create view':
dic['as'] = interpret(dic['as'])

if action=='select':
dic = evaluate_from_clause(dic)

Expand All @@ -95,17 +95,24 @@ def create_query_plan(query, keywords, action):

if action=='create table':
args = dic['create table'][dic['create table'].index('('):dic['create table'].index(')')+1]
#print("args: ", args)

dic['create table'] = dic['create table'].removesuffix(args).strip()
arg_nopk = args.replace('primary key', '')[1:-1]


arglist = [val.strip().split(' ') for val in arg_nopk.split(',')]
dic['column_names'] = ','.join([val[0] for val in arglist])
dic['column_types'] = ','.join([val[1] for val in arglist])
dic['columns_unique'] = ','.join([val[0] for val in arglist if 'unique' in val]) if 'unique' in args else None # added specified column with unique contstraint to dictionary

if 'primary key' in args:
arglist = args[1:-1].split(' ')
dic['primary key'] = arglist[arglist.index('primary')-2]
else:
dic['primary key'] = None



if action=='import':
dic = {'import table' if key=='import' else key: val for key, val in dic.items()}

Expand All @@ -121,6 +128,21 @@ def create_query_plan(query, keywords, action):
else:
dic['force'] = False


if action=='create index':
'''
For creating the B+ Tree index on Unique columns and PK
'''

dic['on']=ql[3] # Table name
if "(" in ql:
# If the user has specified the name of the column

dic['column'] = ql[5] # name of the column
else:
# The index will be created on the PK of the table as normal
dic['column']=None

return dic


Expand All @@ -134,6 +156,7 @@ def evaluate_from_clause(dic):
if from_split[0] == '(' and from_split[-1] == ')':
subquery = ' '.join(from_split[1:-1])
dic['from'] = interpret(subquery)


join_idx = [i for i,word in enumerate(from_split) if word=='join' and not in_paren(from_split,i)]
on_idx = [i for i,word in enumerate(from_split) if word=='on' and not in_paren(from_split,i)]
Expand All @@ -152,6 +175,7 @@ def evaluate_from_clause(dic):

if join_dic['left'].startswith('(') and join_dic['left'].endswith(')'):
join_dic['left'] = interpret(join_dic['left'][1:-1].strip())


if join_dic['right'].startswith('(') and join_dic['right'].endswith(')'):
join_dic['right'] = interpret(join_dic['right'][1:-1].strip())
Expand All @@ -175,7 +199,7 @@ def interpret(query):
'unlock table': ['unlock table', 'force'],
'delete from': ['delete from', 'where'],
'update table': ['update table', 'set', 'where'],
'create index': ['create index', 'on', 'using'],
'create index': ['create index', 'on','column','using'], #added column for unique constraint
'drop index': ['drop index'],
'create view' : ['create view', 'as']
}
Expand All @@ -189,6 +213,7 @@ def interpret(query):
if query.startswith(kw):
action = kw

#return Optimizer._build_equivalent_query_plan(query)
return create_query_plan(query, kw_per_action[action]+[';'], action)

def execute_dic(dic):
Expand Down
160 changes: 138 additions & 22 deletions miniDB/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import readline
from tabulate import tabulate

from hash import HashTable

sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB')
from miniDB import table
sys.modules['table'] = table
Expand Down Expand Up @@ -54,7 +56,7 @@ def __init__(self, name, load=True, verbose = True):
self.create_table('meta_length', 'table_name,no_of_rows', 'str,int')
self.create_table('meta_locks', 'table_name,pid,mode', 'str,int,str')
self.create_table('meta_insert_stack', 'table_name,indexes', 'str,list')
self.create_table('meta_indexes', 'table_name,index_name', 'str,str')
self.create_table('meta_indexes', 'table_name,index_name,column_name,index_type', 'str,str,str,str') #!!!!
self.save_database()

def save_database(self):
Expand Down Expand Up @@ -101,7 +103,7 @@ def _update(self):
self._update_meta_insert_stack()


def create_table(self, name, column_names, column_types, primary_key=None, load=None):
def create_table(self, name, column_names, column_types, columns_unique=None,primary_key=None, load=None):
'''
This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name

Expand All @@ -113,7 +115,8 @@ def create_table(self, name, column_names, column_types, primary_key=None, load=
load: boolean. Defines table object parameters as the name of the table and the column names.
'''
# print('here -> ', column_names.split(','))
self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)})

self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load,columns_unique=columns_unique.split(',') if columns_unique is not None else None)}) #columns_unique.split(',')
# self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load)
# check that new dynamic var doesnt exist already
# self.no_of_tables += 1
Expand Down Expand Up @@ -358,20 +361,89 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \
return table_name._select_where(columns, condition, distinct, order_by, desc, limit)

if condition is not None:
condition_column = split_condition(condition)[0]
#condition_column = split_condition(condition)[0]
#condition_column = condition.split()[0]

if " between " in condition:
condition_column = condition.split(" ")[0]
elif " not " in condition:
condition_column = condition.split(" ")[0]
elif " and " in condition or " or " in condition:
condition_column = condition.split(" ")[0]
else:
condition_column = split_condition(condition)[0]

else:
condition_column = ''


# self.lock_table(table_name, mode='x')
if self.is_locked(table_name):
return
#if self.is_locked(table_name):
# return
index_type = None

#print("The condition column is:", condition_column)
#print("The unique column is:",self.tables[table_name].columns_unique)

#print("Is the column unique: ", conditioncheck)

if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]:
index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0]
bt = self._load_idx(index_name)
table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit)
# in case of more than one indexes in a table, we need to find the index of the specified index in the meta_indexes table.
idx = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('column_name').index(condition_column)
#print(idx)
index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[idx]
index_type = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_type')[idx]
#print(index_type)
#print(self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_type')[idx])
if index_type == 'hash':
ht = self._load_idx(index_name)
try:
print("Hash select")
table = self.tables[table_name]._select_where_with_hash(columns, ht, condition, distinct, order_by, desc, limit)
except:
table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit)
elif index_type == 'btree':
bt = self._load_idx(index_name)
try:
table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit)
except:
table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit)
else:
#print("normal select")
table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit)

if self._has_index(table_name) and any(condition_column in x for x in self.tables[table_name].columns_unique) and condition_column != '' :
# in case of more than one indexes in a table, we need to find the index of the specified index in the meta_indexes table.
idx = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('column_name').index(condition_column)
#print(idx)
index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[idx]

#print("index_name; ", index_name)
index_type = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_type')[idx]
#print(index_type)
#print(self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_type')[idx])
if index_type == 'btree':
bt = self._load_idx(index_name)
#print("btree select")
try:
table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit)
except:
table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit)
elif index_type == 'hash':
ht = self._load_idx(index_name)
try:
print("Hash select")
table = self.tables[table_name]._select_where_with_hash(columns, ht, condition, distinct, order_by, desc, limit)
except:
table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit)

else:
#print("normal select")
table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit)




# self.unlock_table(table_name)
if save_as is not None:
table._name = save_as
Expand Down Expand Up @@ -650,7 +722,7 @@ def _update_meta_insert_stack_for_tb(self, table_name, new_stack):


# indexes
def create_index(self, index_name, table_name, index_type='btree'):
def create_index(self, index_name, table_name,column_name, index_type): #='btree'
'''
Creates an index on a specified table with a given name.
Important: An index can only be created on a primary key (the user does not specify the column).
Expand All @@ -659,37 +731,81 @@ def create_index(self, index_name, table_name, index_type='btree'):
table_name: string. Table name (must be part of database).
index_name: string. Name of the created index.
'''
'''
if self.tables[table_name].pk_idx is None: # if no primary key, no index
raise Exception('Cannot create index. Table has no primary key.')
'''

# If the column_name is None, then the index will be created on the PK of the table
if column_name is None: # if not column_name:
if self.tables[table_name].pk_idx is None:
# If the table doesn't have a Primary Key, then the index will not be created.
print('## ERROR: Cannot create index. Table has no primary key.')
raise Exception('## ERROR: Cannot create index. Table has no primary key.')
else:

# Check if the specified column has the UNIQUE constraint
# and if it is the primary key of the table
if column_name not in self.tables[table_name].columns_unique and column_name!=self.tables[table_name].pk: #if not unique
print("## ERROR: Cannot create index. Column does not have the unique constraint and it is non-pk.")
raise Exception('## ERROR: Cannot create index. Column does not have the unique constraint and it is non-pk.')

if index_name not in self.tables['meta_indexes'].column_by_name('index_name'):
# currently only btree is supported. This can be changed by adding another if.
# currently only btree is supported. This can be changed by adding another if.
if index_type=='btree':
logging.info('Creating Btree index.')
# insert a record with the name of the index and the table on which it's created to the meta_indexes table
self.tables['meta_indexes']._insert([table_name, index_name])
# crate the actual index
self._construct_index(table_name, index_name)
self.tables['meta_indexes']._insert([table_name, index_name, column_name, 'btree'])
# create the actual index
self._construct_index(table_name, index_name, column_name, 'btree') # added column_name for indexes with the unique constraint and index_type column for the two kinds of indexes supported
self.save_database()
elif index_type == 'hash':
logging.info('Creating Hash index.')
self.tables['meta_indexes']._insert([table_name, index_name, column_name, 'hash'])
self._construct_index_hash(table_name, index_name, column_name)
self.save_database()

else:
raise Exception('Cannot create index. Another index with the same name already exists.')
raise Exception('## ERROR: Cannot create index. Another index with the same name already exists.')

def _construct_index(self, table_name, index_name):
def _construct_index(self, table_name, index_name, column_name, index_type=None): # added column_name for indexes with the unique constraint and index_type column for the two kinds of indexes supported
'''
Construct a btree on a table and save.

Args:
table_name: string. Table name (must be part of database).
index_name: string. Name of the created index.
'''
bt = Btree(3) # 3 is arbitrary
if index_type == 'btree':
bt = Btree(3) # 3 is arbitrary
for idx, key in enumerate(self.tables[table_name].column_by_name(column_name)):
if key is None:
continue
bt.insert(key, idx)
self._save_index(index_name, bt)

print("Created index with B+Tree on column: " + column_name + ".")
# bt.insert(key, idx)
# save the btree
#self._save_index(index_name, bt)

def _construct_index_hash(self, table_name, index_name, column_name):
'''
Construct a hash index on a HashTable object that uses extendible hashing and save.

# for each record in the primary key of the table, insert its value and index to the btree
for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)):
Args:
table_name: string. Table name (must be part of database).
index_name: string. Name of the created index.
column_name: string. Name of specified column with index.
'''
exthash = HashTable()
for idx, key in enumerate(self.tables[table_name].column_by_name(column_name)): # for each record in the table, insert it in the HashTable object.
if key is None:
continue
bt.insert(key, idx)
# save the btree
self._save_index(index_name, bt)
exthash.insert(key,idx)

self._save_index(index_name, exthash)
print("Created index with Hash-Index on column: " + column_name + ".")


def _has_index(self, table_name):
Expand Down
Loading