Skip to content
Snippets Groups Projects
Commit 02c81f2a authored by Eunhak Lee's avatar Eunhak Lee
Browse files

feat: json -> sql 생성

parent b694b970
Branches main
No related tags found
No related merge requests found
import os
import json
import pprint
from psycopg import sql
TAGS_DICT = {
"cpu": (
(29815, 4410, 212881, 27507, 235752, 28348, 218151, 213070, 219777, 213073, 5273, 226503, 29814, 4405, 5271, 5270, 28289, 27508, 25090, 218145, 213076, 212884, 31315, 27909, 28286, 28346, 220365, 226497, 212878, 226500, 9005, 213238, 28931, 27517, 219783, 10198, 5274, 5276, 5272, 5246, 28347, 2981092, 8510, 28287, 5252, 4404, 219780, 5263, 231738, 5220, 227589, 5232, 218154, 5196, 234546, 27242, 5177, 213136, 27818, 216054, 23542, 5211, 216051, 213142, 5178, 27452, 232827, 200214, 27834, 212887, 5197, 10200, 231258, 213064, 213139, 4411, 216057, 5194, 214854, 213133, 213130, 214848, 214851,),
(10125, 28042, 4417, 2553, 2559, 235737, 2552, 225834, 23543, 8238, 212890, 27911, 8051, 212893, 219456, 2562, 219786, 2536, 2576, 27231, 27910, 213082, 25093, 217962),
(233178,),
(2625,),
(2626,),
(4461,),
(225849, 3458, 23556, 219798,),
(2635,),
),
"gpu": (
(9034, 2855, 226080, 2825,),
(210196, 31072, 26363, 10628, 10626, 227247, 226074, 10618, 29775, 226086, 218184, 31672, 13349, 26411, 10631, 27841, 214881, 23565, 216357, 2022, 10622),
(
226206, 231984, 226203, 226170,
226206, 226203,
13448, 13441,
229128, 231981, 30022, 218187, 23012, 224775, 30235, 215997, 224778, 217956, 211600, 5840, 226170, 214908, 227655, 6307, 218607, 220710, 229938, 221193, 211606, 30232, 220782, 232059, 218604, 210568, 27837, 229434, 31069, 216759, 226875, 231672, 220779, 214911, 22672, 220941, 218925, 220707, 29832, 26663,
216363, 222294, 230481, 27848, 229452, 27842, 216360, 230484, 227304, 219699, 221532, 231984, 222297, 221529, 219084, 227307, 218163, 217221, 27849, 233355,
),
(5078, 28349, 216114, 5080, 5082, 214926, 23556,),
(2895,),
(13357, 234018, 222543, 222546, 222552, 13809, 214923, 222549, 2693, 2733, 222555,),
(9993,),
(2917,),
),
"ram": (
(3462, 3463, 3457,),
(25174,),
(3467,),
(3458, 23556, 3459, 219834,),
(3468,),
(6690, 6688, 6686, 6685,),
),
"mb": (
(217890, 6695, 6176,),
(10125, 2552, 27231, 27911, 213217, 219786, 235737, 25093, 8238, 28042, 2553, 225834, 27910, 10137, 226533,),
(227646, 227643, 29766, 235740, 25096, 29763, 27494, 8246, 2654, 226248, 217887, 11438, 219843, 213235, 29015, 8240, 27243, 25006, 10188, 220587, 25095, 213220, 29018, 8245, 13553, 226254, 23058, 213226, 213232, 10233, 29019, 25176, 10186, 213229, 211519, 211516, 213813, 226242, 2657, 226239, 6933, 220578, 226536, 25097, 214935, 27453, 27492, 10230, 28871, 220581, 229083, 2655, 31327, 29012, 23002, 217884, 10218, 10309, 235617, 31321, 217698, 10197, 28872, 27493, 217590,),
(29934,),
(1758, 23556, 219846, 26749, 1757,),
(2662,),
(4728,),
(232326, 2672, 2673, 13558, 2671, 2674,),
),
"ssd": (
(30535, 5342, 25995, 2928, 27269,),
(3028,),
(12950, 6841, 30532, 26434, 235974, 5464,),
(30217, 3727, 5433, 3701, 27281, 3726, 3059,),
(27282,),
(25742,),
),
"hdd": (
(2921, 25509, 2920, 25633, 25300, 2919, 2979,),
(5332, 5333, 5331,),
(2975,),
(12952, 5461, 2927, 2926, 30535, 4568, 5342, 2928, 2930,),
(2977,),
(2976,),
(3031,),
(27421, 30643,),
)
}
COLUMNS_DICT = {
"cpu": ('family_type', 'socket_type', 'core_count', 'thread_count', 'base_clock', 'max_clock', 'mem_type', 'tdp',),
"gpu": ('chipset_manufacturer', 'family_type', 'chipset', 'vram_type', 'vram_size', 'interface', 'max_monitor_count', 'power_consumption',),
"mb": ('board_type', 'cpu_socket', 'cpu_chipset', 'power_phase', 'ram_type', 'ram_speed', 'ram_slot_count', 'form_factor',),
"ssd": ('interface', 'size', 'form_factor', 'nand_type', 'dram_type_size', 'protocol',),
"hdd": ('usage_type', 'disk_standard_size', 'interface', 'buffer_size', 'rpm', 'max_speed', 'access_method',),
"ram": ('usage_type', 'form_factor', 'size', 'generation', 'base_clock', 'package_count',),
}
PART_TYPE = os.environ["PART_TYPE"]
target_tags = TAGS_DICT[PART_TYPE]
def safe_int(v):
try:
return int(v)
except:
return None
def parse_single(item):
key, value = item
name = value["name"].strip()
if "meta" not in value:
return [(key, name,),] + [None for _ in target_tags]
row = [(key, name,),]
desc = value["meta"]
for target_tag_set in target_tags:
flag = False
_tag_set = set(target_tag_set)
for tag, obj in desc.items():
if "meta" not in obj:
continue
_value = obj["value"].strip()
dict_keys = obj["meta"]
if "key" in dict_keys:
key = dict_keys["key"]
if safe_int(key) in _tag_set:
row.append(_value)
flag = True
break
if "value" in dict_keys:
key = dict_keys["value"]
if safe_int(key) in _tag_set:
row.append(_value)
flag = True
break
if not flag:
row.append(None)
return row
if __name__ == "__main__":
data = json.load(open("data/{}s-dump.json".format(PART_TYPE), encoding="utf-8"))
rows = []
tot_len = len(target_tags)
matched = 0
missed = 0
for key, value in data.items():
rows.append(parse_single((key, value,)))
nan = rows[-1][1:].count(None)
missed += nan
matched += tot_len - 1 - nan
print("Match rate: {:,}/{:,} {:.4f}%".format(matched, matched + missed, 100 * matched / (matched + missed)))
Q = sql.SQL("WITH rows AS (INSERT INTO parts (name, type, image_url) VALUES ({name}, {part_type}, {image_url}) RETURNING id) INSERT INTO {table} ({columns}) SELECT {} FROM rows;")
with open(PART_TYPE+".sql", "w", encoding="utf-8") as f:
for row in rows:
key, name = row[0]
data = row[1:]
columns = ["part_id"]
values = [sql.Identifier("id")]
for _col, _val in zip(COLUMNS_DICT[PART_TYPE], data):
if not _val:
continue
columns.append(_col)
values.append(_val)
print(Q.format(
sql.SQL(", ").join(values),
name=name,
part_type=PART_TYPE.upper(),
image_url="/static/imgs/product_{}.png".format(key),
table=sql.Identifier("part_info_" + PART_TYPE),
columns=sql.SQL(", ").join(sql.Identifier(x) for x in columns)
).as_string(), file=f)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment