import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

FEATURE_ORDER = [str(i) for i in range(1, 14)]

model = None
label_encoder = None

def train_model_from_db(results):
    X = [eval(r["answers"]) for r in results]
    y = [r["diagnosis"] for r in results]
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    clf = RandomForestClassifier()
    clf.fit(X, y_encoded)
    joblib.dump(clf, "ml_model.pkl")
    joblib.dump(le, "label_encoder.pkl")
    return clf, le

def load_model():
    global model, label_encoder
    model = joblib.load("ml_model.pkl")
    label_encoder = joblib.load("label_encoder.pkl")

def answers_to_features(answers):
    answer_map = {str(a.questionnaire_id): a.answer for a in answers}
    return [1.0 if answer_map.get(qid, False) else 0.0 for qid in FEATURE_ORDER]

def predict_ml_model(features: list[float]):
    global model, label_encoder
    if model is None or label_encoder is None:
        try:
            load_model()
        except Exception as e:
            print("Lỗi khi load mô hình:", e)
            return "Không đủ dữ liệu ML"
    try:
        return label_encoder.inverse_transform(model.predict([features]))[0]
    except Exception as e:
        print("Lỗi khi dự đoán:", e)
        return "Không đủ dữ liệu ML"

