From 41efdb4d779017ee9b6ae8af6ed07d59f622ff66 Mon Sep 17 00:00:00 2001
From: Seok Won <ikr@kakao.com>
Date: Sun, 20 Dec 2020 22:20:25 +0900
Subject: [PATCH] Update AjouNoticesParser

requests cause SSL Error.

For now, we change it to urlopen with unverifed context
---
 python/src/AjouSlackProducer.py      | 15 +++++++-------
 python/src/AjouSlackProducerMySQL.py | 16 ++++++++-------
 python/tests/test_parser.py          | 29 ++++++++++++++++------------
 3 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/python/src/AjouSlackProducer.py b/python/src/AjouSlackProducer.py
index 665e753..f389f2b 100644
--- a/python/src/AjouSlackProducer.py
+++ b/python/src/AjouSlackProducer.py
@@ -3,8 +3,9 @@ import json
 import os
 import time
 from pathlib import Path
+from urllib.error import HTTPError
+from urllib.request import urlopen
 
-import requests
 from bs4 import BeautifulSoup
 from config import Config
 from confluent_kafka import Producer
@@ -199,16 +200,16 @@ class AjouParserJSON:
 
     # Ajou notices parser
     def parser(self):
+        context = ssl._create_unverified_context()
         try:
-            req = requests.get(
-                f"{self.ADDRESS}?mode=list&&articleLimit=10&article.offset=0"
+            result = urlopen(
+                f"{self.ADDRESS}?mode=list&&articleLimit={self.LENGTH}&article.offset=0",
+                context=context,
             )
-            req.raise_for_status()
-        except requests.exceptions.ConnectionError:
+        except HTTPError:
             print("Seems like the server is down now.")
             return None, None, None, None
-        req.encoding = "utf-8"
-        html = req.text
+        html = result.read()
         soup = BeautifulSoup(html, "html.parser")
         ids = soup.select("table > tbody > tr > td.b-num-box")
         posts = soup.select("table > tbody > tr > td.b-td-left > div > a")
diff --git a/python/src/AjouSlackProducerMySQL.py b/python/src/AjouSlackProducerMySQL.py
index 6f7a547..1ed9fbf 100644
--- a/python/src/AjouSlackProducerMySQL.py
+++ b/python/src/AjouSlackProducerMySQL.py
@@ -1,10 +1,12 @@
 import datetime
 import json
 import os
+import ssl
 import time
+from urllib.error import HTTPError
+from urllib.request import urlopen
 
 import mysql.connector
-import requests
 from bs4 import BeautifulSoup
 from config import Config
 from confluent_kafka import Producer
@@ -187,16 +189,16 @@ class AjouParser:
 
     # Ajou notices parser
     def parser(self):
+        context = ssl._create_unverified_context()
         try:
-            req = requests.get(
-                f"{self.ADDRESS}?mode=list&&articleLimit=10&article.offset=0"
+            result = urlopen(
+                f"{self.ADDRESS}?mode=list&&articleLimit={self.LENGTH}&article.offset=0",
+                context=context,
             )
-            req.raise_for_status()
-        except requests.exceptions.ConnectionError:
+        except HTTPError:
             print("Seems like the server is down now.")
             return None, None, None, None
-        req.encoding = "utf-8"
-        html = req.text
+        html = result.read()
         soup = BeautifulSoup(html, "html.parser")
         ids = soup.select("table > tbody > tr > td.b-num-box")
         posts = soup.select("table > tbody > tr > td.b-td-left > div > a")
diff --git a/python/tests/test_parser.py b/python/tests/test_parser.py
index 92f757d..c08cc55 100644
--- a/python/tests/test_parser.py
+++ b/python/tests/test_parser.py
@@ -1,6 +1,7 @@
 import json
+import ssl
 
-import requests
+from urllib.request import urlopen
 from bs4 import BeautifulSoup
 
 
@@ -24,9 +25,13 @@ def makeJson(postId, postTitle, postDate, postLink, postWriter):
 
 
 def parser():
-    req = requests.get(f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0")
-    req.encoding = "utf-8"
-    html = req.text
+    # req = requests.get(f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0")
+    context = ssl._create_unverified_context()
+    result = urlopen(
+        f"{ADDRESS}?mode=list&&articleLimit={LENGTH}&article.offset=0", context=context
+    )
+
+    html = result.read()
     soup = BeautifulSoup(html, "html.parser")
     ids = soup.select("table > tbody > tr > td.b-num-box")
     posts = soup.select("table > tbody > tr > td.b-td-left > div > a")
@@ -46,14 +51,14 @@ def test_parse():
     assert len(writers) == 10, f"Check your parser: {writers}"
     for i in range(LENGTH):
         postTitle = posts[i].text.strip()
-        if FILTER_WORDS:
-            FILTERD = False
-            for filter in FILTER_WORDS:
-                if filter in postTitle:
-                    FILTERD = True
-                    break
-            if not FILTERD:
-                continue
+        # if FILTER_WORDS:
+        #     FILTERD = False
+        #     for filter in FILTER_WORDS:
+        #         if filter in postTitle:
+        #             FILTERD = True
+        #             break
+        #     if not FILTERD:
+        #         continue
 
         postId = ids[i].text.strip()
         postLink = posts[i].get("href")
-- 
GitLab