@article {
author = {Gopdarzi, Atousa and Janatbabaei, Sajad},
title = {Evaluation of Three Data Mining Algorithms 
(Decision Tree, Naive Bayes, Logistic Regression)
 in Auto Insurance Fraud Detection},
journal = {Insurance Research},
volume = {1},
number = {2},
pages = {61-80},
year  = {2017},
publisher = {Allameh Tabataba’i University},
issn = {2538-6204}, 
eissn = {2538-2497}, 
doi = {10.22054/irisk.2017.9349},
abstract = {From an economic standpoint, financial fraud in the insurance industry is becoming an increasingly serious problem. Nowadays, data mining techniques are commonly used to develop detection models and fraud patterns. Data mining plays an important role in financial fraud detection, helping to uncover hidden facts from among very large quantities of data. Insurance companies can discover hidden patterns in the data and, with the use of well-designed models, make efforts to reduce fraud, manage risks and fulfill their commitments. In this paper, current techniques for detecting auto insurance fraud (logistic regression, decision tree, Naive Bayes) are used. Data mining techniques are typically used to develop models that can spot patterns in fraudulent claims. The techniques introduced are applied to real data in order to also offer predictions. Seven explanatory variables have been examined in the three models. These variables are: policy age, number of claims, delay, age, gender, police sketches, amount of loss. After obtaining the necessary permits, the data required was collected from one of Iran's largest insurance companies. The data was then divided into two sections, one to develop models and the other for classification. Research findings showed that the logistic regression model demonstrated greater accuracy compared to the other two models in predicting total claims (fraudulent and non-fraudulent).},
keywords = {Data mining,Fraud,Auto insurance,Decision tree,Naive Bayes,Logistic regression},
title_fa = {ارزیابی الگوریتم‌های درخت تصمیم، بیز ساده و رگرسیون لجستیک در کشف تقلبات بیمه اتومبیل},
abstract_fa = {  از منظر علوم اقتصادی و با مراجعه به آمار و اطلاعات، تخلفات مالی در صنعت بیمه به‌صورت فزاینده‌ای در حال تبدیل‌شدن به یک مسئله جدی و قابل تأمل است. یکی از روش‌های مناسب جهت ارزیابی و مدل‌سازی تخلفات و تقلبات، تکنیک‌های داده‌کاوی است که می‌تواند نقش مهمی در کشف و پیش‌بینی تقلبات مالی ایفا نماید. این شیوه برای آشکار نمودن حقایق پنهان در ورای حجم انبوهی از داده‌ها بکار گرفته می‌شود. شرکت‌های بیمه می‌توانند الگوی پنهان در داده‌ها را کشف کرده و با مدل‌سازی و طراحی الگوهای مناسب اقدامات جدی در راستای کاهش تقلبات، مدیریت ریسک‌ها و ایفای تعهدات به عمل‌آورند. در این مقاله، تکنیک‌های رایج جهت کشف تقلب در بیمه‌های اتومبیل (شامل رگرسیون لجستیک، درخت تصمیم و دسته بندی بیز ساده) مورداستفاده قرار می‌گیرد. ابزارهای داده‌کاوی به‌طور معمول با هدف تعمیم مدل‌های کشف ادعاهای تقلبی و ارائه پیش‌بینی به کار گرفته می‌شوند. متغیرهای توضیحی که در سه مدل این مطالعه مورداستفاده قرار می‌گیرند، عبارت‌اند از: سابقه بیمه‌ای، تعداد ادعاهای خسارت، تأخیر در اعلان خسارت، سن، جنس، کروکی و مبلغ خسارت (شکوری 1390) داده‌های موردنیاز از یکی از شرکت‌های بزرگ بیمه، پس از اخذ مجوزهای لازم، جمع‌آوری شده است. داده‌ها به دو بخش تقسیم شده است. از بخش نخست برای ساخت مدل و از بخش دوم برای دسته‌بندی استفاده شده است. یافته‌های این مطالعه مؤید آن است که مدل رگرسیون لجستیک دقت بیشتری برای پیش‌بینی کل ادعاها(تقلبی و غیر تقلبی) نسبت به دو مدل دیگر، یعنی درخت تصمیم و روش بیز داشته است.},
keywords_fa = {داده‌کاوی,تقلب,بیمه اتومبیل,درخت تصمیم,بیز ساده,رگرسیون لجستیک},	
url = {https://irisk.atu.ac.ir/article_9349.html},
eprint = {https://irisk.atu.ac.ir/article_9349_e9de698778fa6912b97eb6004eeb665c.pdf}
}