#!/usr/bin/env python3 from random import random properties = { 'manufacturers': ['LG', 'HP', 'Lenovo', 'Samsung', 'Asus', 'Acer', 'Apple'], 'cpus': ['x64', 'ARM', 'RISC-V'], 'harddrive_types': ['SSD', 'HDD', 'SSD+HDD'], 'harddrive_spaces': ['<= 128 GB', '128-256GB', '256-512GB', '512-1024GB', '1024+GB'], 'ram_types': ['DDR3', 'DDR4'], 'rams': ['1-2GB', '2-4GB', '4-8GB', '8-16GB', '16+GB'], 'wifis': ['No Wifi', 'Wifi 2.4', 'Wifi 5.8'], 'bluetooths': ['No Bluetooth', 'Has Bluetooth'], 'ethernets': ['No Ethernet', 'Has Ethernet'], 'webcams': ['No Webcam', 'Has Webcam'], 'cardreaders': ['No Cardreader', 'Has Cardreader'], 'graphics': ['Integrated GPU', 'External GPU', 'Integrated+External GPU'], 'displays': ['13.3 inch', '15.6 inch', '16 inch'], 'usbs': ['No USB', 'USB 2.0', 'USB 3.0'], 'batteries': ['<= 4 Hours', '4-6 hours', '6-8 hours', '8-10 hours', '10+ hours'], } vector_len = 0 for p in properties.keys(): vector_len += len(properties[p]) print(""" CREATE TABLE IF NOT EXISTS laptops( id SERIAL PRIMARY KEY, manufacturer TEXT NOT NULL, cpu TEXT NOT NULL, harddrive_type TEXT NOT NULL, harddrive_space TEXT NOT NULL, ram_type TEXT NOT NULL, ram TEXT NOT NULL, wifi TEXT NOT NULL, bluetooth TEXT NOT NULL, ethernet TEXT NOT NULL, webcam TEXT NOT NULL, cardreader TEXT NOT NULL, graphics TEXT NOT NULL, display TEXT NOT NULL, usb TEXT NOT NULL, battery TEXT NOT NULL, embedding VECTOR("""+str(vector_len)+""") NOT NULL ); """) inserted = 0 for id in range(1, 1_000_000 + 1): embedding = [] if inserted == 0: print("INSERT INTO laptops VALUES("+str(id)+",", end =''); else: print(",("+str(id)+",") for p in properties.keys(): arr = properties[p] i = int(random()*len(arr)) embedding += [ '1' if j == i else '0' for j in range(0, len(arr))] print("'"+arr[i]+"',", end ='') emb_text = '['+(','.join(embedding))+']'; print("'"+emb_text+"')", end = '') inserted += 1; if inserted >= 1000: print(";") inserted = 0 if inserted > 0: print(";")