import requests
import uuid
import time
import json
# image_file = './IR_PROJECT/TEST/IR_all_test.pdf' # 이거 없애야할듯!!!! 함수에서 변수로 받아야함
def ocr_connect(image_file):
api_url = '----- naver api url -----'
secret_key = '----- naver secret key ------'
keyword_list = []
request_json = {
'images': [
{
'format': 'jpg',
'name': 'demo'
}
],
'requestId': str(uuid.uuid4()),
'version': 'V2',
'timestamp': int(round(time.time() * 1000))
}
payload = {'message': json.dumps(request_json).encode('UTF-8')}
files = [
('file', open(image_file,'rb'))
]
headers = {
'X-OCR-SECRET': secret_key
}
response = requests.request("POST", api_url, headers=headers, data = payload, files = files)
# print(response.text.encode('utf8'))
if response.status_code == 200:
result = response.json()
if 'images' in result:
for image_info in result['images']:
if 'fields' in image_info:
for field in image_info['fields']:
# print(f"{field['inferText']}")
keyword_list.append(field['inferText'])
else:
print("이미지 정보를 찾을 수 없습니다.")
print("한장씩 keyword_list 출력완료")
return keyword_list
else:
print(f"API 요청 실패: {response.status_code}")
print(response.text)