diff --git a/tag_mapper.py b/tag_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..5217df1fd9910e7bbf347533d3832c9c2de24e57 --- /dev/null +++ b/tag_mapper.py @@ -0,0 +1,169 @@ +import os +import json +import pprint +from psycopg import sql + +TAGS_DICT = { + "cpu": ( + (29815, 4410, 212881, 27507, 235752, 28348, 218151, 213070, 219777, 213073, 5273, 226503, 29814, 4405, 5271, 5270, 28289, 27508, 25090, 218145, 213076, 212884, 31315, 27909, 28286, 28346, 220365, 226497, 212878, 226500, 9005, 213238, 28931, 27517, 219783, 10198, 5274, 5276, 5272, 5246, 28347, 2981092, 8510, 28287, 5252, 4404, 219780, 5263, 231738, 5220, 227589, 5232, 218154, 5196, 234546, 27242, 5177, 213136, 27818, 216054, 23542, 5211, 216051, 213142, 5178, 27452, 232827, 200214, 27834, 212887, 5197, 10200, 231258, 213064, 213139, 4411, 216057, 5194, 214854, 213133, 213130, 214848, 214851,), + (10125, 28042, 4417, 2553, 2559, 235737, 2552, 225834, 23543, 8238, 212890, 27911, 8051, 212893, 219456, 2562, 219786, 2536, 2576, 27231, 27910, 213082, 25093, 217962), + (233178,), + (2625,), + (2626,), + (4461,), + (225849, 3458, 23556, 219798,), + (2635,), + ), + "gpu": ( + (9034, 2855, 226080, 2825,), + (210196, 31072, 26363, 10628, 10626, 227247, 226074, 10618, 29775, 226086, 218184, 31672, 13349, 26411, 10631, 27841, 214881, 23565, 216357, 2022, 10622), + ( + 226206, 231984, 226203, 226170, + 226206, 226203, + 13448, 13441, + 229128, 231981, 30022, 218187, 23012, 224775, 30235, 215997, 224778, 217956, 211600, 5840, 226170, 214908, 227655, 6307, 218607, 220710, 229938, 221193, 211606, 30232, 220782, 232059, 218604, 210568, 27837, 229434, 31069, 216759, 226875, 231672, 220779, 214911, 22672, 220941, 218925, 220707, 29832, 26663, + 216363, 222294, 230481, 27848, 229452, 27842, 216360, 230484, 227304, 219699, 221532, 231984, 222297, 221529, 219084, 227307, 218163, 217221, 27849, 233355, + ), + (5078, 28349, 216114, 5080, 5082, 214926, 23556,), + (2895,), + (13357, 234018, 222543, 222546, 222552, 13809, 214923, 222549, 2693, 2733, 222555,), + (9993,), + (2917,), + ), + "ram": ( + (3462, 3463, 3457,), + (25174,), + (3467,), + (3458, 23556, 3459, 219834,), + (3468,), + (6690, 6688, 6686, 6685,), + ), + "mb": ( + (217890, 6695, 6176,), + (10125, 2552, 27231, 27911, 213217, 219786, 235737, 25093, 8238, 28042, 2553, 225834, 27910, 10137, 226533,), + (227646, 227643, 29766, 235740, 25096, 29763, 27494, 8246, 2654, 226248, 217887, 11438, 219843, 213235, 29015, 8240, 27243, 25006, 10188, 220587, 25095, 213220, 29018, 8245, 13553, 226254, 23058, 213226, 213232, 10233, 29019, 25176, 10186, 213229, 211519, 211516, 213813, 226242, 2657, 226239, 6933, 220578, 226536, 25097, 214935, 27453, 27492, 10230, 28871, 220581, 229083, 2655, 31327, 29012, 23002, 217884, 10218, 10309, 235617, 31321, 217698, 10197, 28872, 27493, 217590,), + (29934,), + (1758, 23556, 219846, 26749, 1757,), + (2662,), + (4728,), + (232326, 2672, 2673, 13558, 2671, 2674,), + ), + "ssd": ( + (30535, 5342, 25995, 2928, 27269,), + (3028,), + (12950, 6841, 30532, 26434, 235974, 5464,), + (30217, 3727, 5433, 3701, 27281, 3726, 3059,), + (27282,), + (25742,), + ), + "hdd": ( + (2921, 25509, 2920, 25633, 25300, 2919, 2979,), + (5332, 5333, 5331,), + (2975,), + (12952, 5461, 2927, 2926, 30535, 4568, 5342, 2928, 2930,), + (2977,), + (2976,), + (3031,), + (27421, 30643,), + ) +} +COLUMNS_DICT = { + "cpu": ('family_type', 'socket_type', 'core_count', 'thread_count', 'base_clock', 'max_clock', 'mem_type', 'tdp',), + "gpu": ('chipset_manufacturer', 'family_type', 'chipset', 'vram_type', 'vram_size', 'interface', 'max_monitor_count', 'power_consumption',), + "mb": ('board_type', 'cpu_socket', 'cpu_chipset', 'power_phase', 'ram_type', 'ram_speed', 'ram_slot_count', 'form_factor',), + "ssd": ('interface', 'size', 'form_factor', 'nand_type', 'dram_type_size', 'protocol',), + "hdd": ('usage_type', 'disk_standard_size', 'interface', 'buffer_size', 'rpm', 'max_speed', 'access_method',), + "ram": ('usage_type', 'form_factor', 'size', 'generation', 'base_clock', 'package_count',), +} +PART_TYPE = os.environ["PART_TYPE"] +target_tags = TAGS_DICT[PART_TYPE] + + +def safe_int(v): + try: + return int(v) + except: + return None + + +def parse_single(item): + key, value = item + name = value["name"].strip() + + if "meta" not in value: + return [(key, name,),] + [None for _ in target_tags] + + row = [(key, name,),] + desc = value["meta"] + for target_tag_set in target_tags: + flag = False + _tag_set = set(target_tag_set) + + for tag, obj in desc.items(): + if "meta" not in obj: + continue + + _value = obj["value"].strip() + dict_keys = obj["meta"] + + if "key" in dict_keys: + key = dict_keys["key"] + if safe_int(key) in _tag_set: + row.append(_value) + flag = True + break + if "value" in dict_keys: + key = dict_keys["value"] + if safe_int(key) in _tag_set: + row.append(_value) + flag = True + break + if not flag: + row.append(None) + return row + + +if __name__ == "__main__": + data = json.load(open("data/{}s-dump.json".format(PART_TYPE), encoding="utf-8")) + + rows = [] + + tot_len = len(target_tags) + matched = 0 + missed = 0 + + for key, value in data.items(): + rows.append(parse_single((key, value,))) + + nan = rows[-1][1:].count(None) + missed += nan + matched += tot_len - 1 - nan + + print("Match rate: {:,}/{:,} {:.4f}%".format(matched, matched + missed, 100 * matched / (matched + missed))) + + + Q = sql.SQL("WITH rows AS (INSERT INTO parts (name, type, image_url) VALUES ({name}, {part_type}, {image_url}) RETURNING id) INSERT INTO {table} ({columns}) SELECT {} FROM rows;") + + with open(PART_TYPE+".sql", "w", encoding="utf-8") as f: + for row in rows: + key, name = row[0] + data = row[1:] + + columns = ["part_id"] + values = [sql.Identifier("id")] + for _col, _val in zip(COLUMNS_DICT[PART_TYPE], data): + if not _val: + continue + + columns.append(_col) + values.append(_val) + + print(Q.format( + sql.SQL(", ").join(values), + name=name, + part_type=PART_TYPE.upper(), + image_url="/static/imgs/product_{}.png".format(key), + table=sql.Identifier("part_info_" + PART_TYPE), + columns=sql.SQL(", ").join(sql.Identifier(x) for x in columns) + ).as_string(), file=f) +