Skip to content
Snippets Groups Projects
Verified Commit b229065e authored by Eunhak Lee's avatar Eunhak Lee
Browse files

feat: hwinfo grabber 의 데이터를 처리함

parents
Branches
No related tags found
No related merge requests found
.DS_Store
/venv/
__pycache__
*.html
*.json
*.xml
*.zip
*.txt
!/requirements.txt
.env*
!/.env.example
parse.py 0 → 100644
import os
import re
import xml.etree.ElementTree as ET
def parse_single(body):
stripped = body
#stripped = "".join([x.strip() for x in body.split('\n')])
stripped = re.sub(r"&", "&", stripped)
stripped = re.sub(r"<>", "", stripped)
doc = ET.fromstring(stripped)
parsed = {}
hierarchy = []
for search in doc.findall("search"):
query = search.find("query")
part_type = query.text.lower().split(' win32_')[-1]
if part_type not in parsed:
parsed[part_type] = []
for part in search.find("parts"):
_part = {}
for meta in part:
_part[meta.tag] = meta.text
parsed[part_type].append(_part)
return parsed
def extract_tag(_tree, candidates):
for key in candidates:
if key in _tree:
return _tree[key].strip()
return None
def failsafe(func, value):
try:
return func(value)
except:
return value
def readable(value, base=1000):
UNIT = ["B", "KB", "MB", "GB", "TB"]
val = value // 1
idx = 0
while idx < len(UNIT) - 1 and (val // base) >= 1:
val /= base
idx += 1
return "{} {}".format(int(val), UNIT[idx])
def parse_cpu(_tree):
name = extract_tag(_tree, ("Name",))
socket_type = extract_tag(_tree, ("SocketDesignation",))
core_count = failsafe(int, extract_tag(_tree, ("NumberOfCores",)))
thread_count = failsafe(int, extract_tag(_tree, ("ThreadCount",)))
max_clock = failsafe(int, extract_tag(_tree, ("MaxClockSpeed",)))
l2_size = failsafe(int, extract_tag(_tree, ("L2CacheSize",)))
l3_size = failsafe(int, extract_tag(_tree, ("L3CacheSize",)))
if isinstance(l2_size, int):
l2_size = readable(l2_size, base=1024)
if isinstance(l3_size, int):
l3_size = readable(l3_size, base=1024)
return {
"name": name,
"socket_type": socket_type,
"core_count": core_count,
"thread_count": thread_count,
"max_clock": max_clock,
"l2_size": l2_size,
"l3_size": l3_size,
}
def parser_mem(_tree):
name = extract_tag(_tree, ("PartNumber",))
size = failsafe(int, extract_tag(_tree, ("Capacity",)))
base_clock = failsafe(int, extract_tag(_tree, ("ConfiguredClockSpeed",)))
manufacturer = extract_tag(_tree, ("Manufacturer",))
serial = extract_tag(_tree, ("SerialNumber",))
form_factor = extract_tag(_tree, ("DeviceLocator",))
generation = extract_tag(_tree, ("SMBIOSMemoryType", "MemoryType",))
if form_factor and "DIMM" in form_factor:
form_factor = "DIMM"
if isinstance(size, int):
size = readable(size, base=1024)
return {
"name": name,
"size": size,
"base_clock": base_clock,
"manufacturer": manufacturer,
"serial": serial,
"form_factor": form_factor,
}
def parser_mb(_tree):
name = extract_tag(_tree, ("Product", "Name",))
manufacturer = extract_tag(_tree, ("Manufacturer",))
serial = extract_tag(_tree, ("SerialNumber",))
return {
"name": name,
"manufacturer": manufacturer,
"serial": serial,
}
def parser_disk(_tree):
name = extract_tag(_tree, ("Model", "Caption",))
capacity = failsafe(int, extract_tag(_tree, ("Size",)))
interface = extract_tag(_tree, ("InterfaceType",))
serial = extract_tag(_tree, ("SerialNumber",))
if isinstance(capacity, int):
capacity = readable(capacity, base=1000)
return {
"name": name,
"capacity": capacity,
"interface": interface,
"serial": serial,
}
def parser_gpu(_tree):
name = extract_tag(_tree, ("Name", "VideoProcessor", "Caption",))
manufacturer = extract_tag(_tree, ("AdapterCompatibility",))
vram_size = failsafe(int, extract_tag(_tree, ("AdapterRAM",)))
if vram_size == -1048576:
vram_size = None
if isinstance(vram_size, int):
vram_size = readable(vram_size, base=1024)
if vram_size and "GB" not in name:
name += f" ({vram_size})"
return {
"name": name,
"manufacturer": manufacturer,
"vram_size": vram_size,
}
def filter_virtual(_values):
return [
_dict for _dict in _values
if "name" in _dict and _dict["name"] and not any(any(x in str(value).lower() for x in (
"가상", "virtual", "parsec", "remote", "intel(r) iris(r)", "amd radeon(tm) graphics", "intel(r) uhd graphics", "기본", "default",
)) for value in _dict.values())
]
def handle_file(filename):
with open(filename, encoding="utf-8") as f:
data = f.read()
return handle_xml_body(data)
def handle_xml_body(data):
tree = parse_single(data)
cpu_name = filter_virtual([parse_cpu(_doc) for _doc in tree['processor']])
disks_name = filter_virtual([parser_disk(_doc) for _doc in tree['diskdrive']])
board_name = filter_virtual([parser_mb(tree['baseboard'][0])])
memory_name = filter_virtual([parser_mem(_doc) for _doc in tree['physicalmemory']])
gpu_name = filter_virtual([parser_gpu(_doc) for _doc in tree['videocontroller']])
return {
"CPU": cpu_name,
"MB": board_name,
"RAM": memory_name,
"DISK": disks_name,
"GPU": gpu_name,
}
if __name__ == "__main__":
files = list(filter(lambda x: x.startswith("meanspec-log"), os.listdir()))
for fname in files:
print(fname, handle_xml(fname))
import dotenv
dotenv.load_dotenv(dotenv_path=".env", override=False)
import os
import re
import sys
import time
import parse
import redis
import psycopg2
import traceback
from datetime import datetime, timedelta
def conn():
return psycopg2.connect(host="hub.enak.kr", user="meanspec", password="foopassword123", dbname="meanspec")
def check_similarity(_conn, name, part_type):
_cursor = _conn.cursor()
where_clause = "WHERE type = '{}'".format(part_type.upper()) if part_type else ""
_auto_threshold = 1.0 if part_type == 'RAM' else 0.13
_cursor.execute("WITH comp AS (SELECT id, name, similarity(name::text, %s::text) as sim FROM parts " + where_clause + ") SELECT * FROM comp WHERE sim >=%s ORDER BY sim DESC LIMIT 5;", (name, _auto_threshold,))
data = _cursor.fetchall()
# for row in (data or []):
# print(row)
return data[:1] if data else None
def handle_single_part(_dict, part_type):
assert "name" in _dict
assert bool(part_type)
if part_type == 'DISK':
return None
name = _dict["name"]
if part_type == 'GPU':
name = name.replace("GeForce", "").replace("NVIDIA", "")
matched_parts = check_similarity(db_conn, name, part_type)
if not matched_parts:
return None
return matched_parts[0][:-1]
def create_new_one(_dict, part_type, _cursor):
_dict = _dict.copy()
name = _dict.pop('name')
_part_type_enum = part_type.upper()
if "serial" in _dict:
_ = _dict.pop('serial')
_obj = list(_dict.items())
columns = ", ".join([f'"{key}"' for key, _ in _obj])
values = [value for _, value in _obj]
sql = """WITH rows AS (INSERT INTO parts (name, type) VALUES (%s, %s) RETURNING id)
INSERT INTO part_info_{type} (part_id, {columns}) (SELECT rows.id, {value_entries} FROM rows) RETURNING rows.id;""".format(type=_part_type_enum, columns=columns, value_entries=", ".join(["%s"] * len(values)))
_cursor.execute(sql, (name, _part_type_enum, *values))
data = _cursor.fetchall()
if data:
return data[0][0]
return None
def get_user_by_email(_cursor, _email):
_cursor.execute("SELECT id FROM users WHERE email = %s;", (_email,))
data = _cursor.fetchall()
if data:
return data[0][0]
return None
def create_user(_cursor, _nick, _email):
_cursor.execute("INSERT INTO users (email, nickname, password) VALUES (%s, %s, 'never matching password lol') RETURNING id;", (_email, _nick,))
data = _cursor.fetchall()
if data:
return data[0][0]
return None
def safe_get_user(_cursor, _nick, _email):
_existing_user_id = get_user_by_email(_email)
if _existing_user_id:
return _existing_user_id
return create_user(_cursor, _nick, _email)
def create_combination(_cursor, nickname, _user_id):
_pc_name = "새 PC"
if nickname:
_pc_name = "" + str(nickname) + "의 PC"
_cursor.execute("INSERT INTO combinations (owner_id, name) VALUES (%s, %s) RETURNING id;", (_user_id, _pc_name,))
data = _cursor.fetchall()
if data:
return data[0][0]
return None
def wire_part_to_combination(_cursor, _combination_id, *_part_ids):
values = []
for _part_id in _part_ids:
values.append(_combination_id)
values.append(_part_id)
_cursor.execute("INSERT INTO relations (combination_id, part_id) VALUES " + ", ".join(["(%s, %s)" for _ in _part_ids]) + "RETURNING id, part_id", values)
return _cursor.fetchall()
def match_part_obj_into_db(_part, part_type):
matched = handle_single_part(_part, part_type)
_matched_id = None
if matched:
_matched_id = matched[0]
return _matched_id
def func():
value = R.lpop("mypc:queue")
if value is None:
return
code = value.decode()
print("Processing code:", code)
_user_id = R.get(f"mypc:code:{code}:user_id")
_transaction_id = R.get(f"mypc:code:{code}:transaction_id")
_xml_document = R.get(f"mypc:code:{code}:document")
assert _user_id is not None, "userId must not be null"
assert _transaction_id is not None, "transactionId must not be null"
assert _xml_document is not None, "document must not be null"
_user_id = int(_user_id.decode())
_transaction_id = _transaction_id.decode()
_xml_document = _xml_document.decode()
print("User ID:", _user_id)
print("Transaction ID:", _transaction_id)
print("XML starts with:", repr(_xml_document[:50]))
_parts = parse.handle_xml_body(_xml_document)
_part_ids = []
print("Matching parts")
for part_type, parts in _parts.items(): # 부품 유형별 iterate
if part_type == "DISK":
continue
for part in parts: # 부품별 iterate
_matched_id = match_part_obj_into_db(part, part_type)
if not _matched_id: # DB에 없는 부품이라면
print(".. creating new part with", part["name"])
with db_conn.cursor() as cursor:
_matched_id = create_new_one(part, part_type, cursor)
db_conn.commit()
if _matched_id:
_part_ids.append(_matched_id,)
continue
print(f"[WARNING] failed to match part type={part_type}, part:", part, file=sys.stderr)
print("Creating combination")
with db_conn.cursor() as cursor:
_combination_id = create_combination(cursor, None, _user_id)
if not _combination_id:
print("Failed to create combination", file=sys.stderr)
exit(1)
print("Wiring parts", _part_ids, "to combination id", _combination_id)
with db_conn.cursor() as cursor:
wire_part_to_combination(cursor, _combination_id, *_part_ids)
print("Committing to DB")
db_conn.commit()
print("Removing anchor key from Redis")
R.delete(f"mypc:code:{code}:document")
if __name__ == "__main__":
_first_run = True
_redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/")
print("Connecting Redis to", _redis_url)
global R
R = redis.Redis.from_url(_redis_url)
try:
_ = R.keys()
print("Successfully connected Redis.")
except:
print("Failed to connect Redis.", traceback.format_exc(), file=sys.stderr)
exit(1)
_db_host = os.getenv("DB_HOST", "localhost")
_db_port = int(os.getenv("DB_PORT", "5432"))
_db_name = os.getenv("DB_NAME", "meanspec")
_db_user = os.getenv("DB_USER", "meanspec")
_db_pass = os.getenv("DB_PASS", "foopassword123")
print("Connecting PostgreSQL to", _db_host, _db_port, _db_name, _db_user)
global db_conn
try:
db_conn = psycopg2.connect(host=_db_host, port=_db_port, dbname=_db_name, user=_db_user, password=_db_pass)
except:
print("Failed to connect PostgreSQL.", traceback.format_exc(), file=sys.stderr)
exit(1)
_next_run_delta = timedelta(seconds=int(os.getenv("PARSE_DAEMON_REFRESH_INTERVAL", 5)))
print("Check Redis as interval", _next_run_delta)
last_run = datetime.now()
last_run -= timedelta(microseconds=last_run.microsecond)
next_run = last_run
try:
while True:
if not _first_run and datetime.now() <= next_run:
time.sleep(min(0.1, (next_run - datetime.now()).total_seconds()))
continue
_first_run = False
next_run += _next_run_delta
func()
except KeyboardInterrupt:
exit(0)
except Exception as e:
raise e
finally:
print("Closing connections")
R.close()
db_conn.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment