| | import re |
| | import emoji |
| | from PIL import Image |
| | from backend import config |
| | from backend.utils import get_roi, clean_text, are_strings_similar, blur_image, is_blank, is_english, is_valid_english, destroy_text_roi |
| | from backend.model_handler import model_handler |
| |
|
| | def is_unreadable_tagline(htag, tag): |
| | clean_htag = clean_text(htag) |
| | clean_tag = clean_text(tag) |
| | return not are_strings_similar(clean_htag, clean_tag) |
| |
|
| | def is_hyperlink_tagline(tag): |
| | substrings = ['www', '.com', 'http'] |
| | return any(sub in tag for sub in substrings) |
| |
|
| | def is_price_tagline(tag): |
| | exclude_keywords = ["crore", "thousand", "million", "billion", "trillion"] |
| | exclude_pattern = r'(₹\.?\s?\d+\s*(lac|lacs|lakh|lakhs|cr|k))|(\brs\.?\s?\d+\s*(lac|lacs|lakh|lakhs|cr|k))|(\$\.?\s?\d+\s*(lac|lacs|lakh|lakhs|cr|k))' |
| | price_pattern = r'(₹\s?\d+)|(\brs\.?\s?\d+)|(\$\s?\d+)|(र\d+)' |
| | |
| | if any(keyword in tag for keyword in exclude_keywords): |
| | return False |
| | if re.search(exclude_pattern, tag): |
| | return False |
| | return bool(re.search(price_pattern, tag)) |
| |
|
| | def is_multiple_emoji(emoji_text): |
| | words = emoji_text.split() |
| | last_word = words[-1] |
| | return last_word not in ['0', '1'] |
| |
|
| | def is_incomplete_tagline(tag, is_eng): |
| | tag = emoji.replace_emoji(tag, '') |
| | tag = tag.strip() |
| | if tag.endswith(('...', '..')): |
| | return True |
| | if not is_eng and tag.endswith(('.')): |
| | return True |
| | return False |
| |
|
| | def tagline(image_path): |
| | results = { |
| | "Empty/Illegible/Black Tagline": 0, |
| | "Multiple Taglines": 0, |
| | "Incomplete Tagline": 0, |
| | "Hyperlink": 0, |
| | "Price Tag": 0, |
| | "Excessive Emojis": 0 |
| | } |
| |
|
| | image = get_roi(image_path, *config.TAG) |
| | himage = blur_image(image, 0.3) |
| | easytag = model_handler.easyocr_ocr(image).lower().strip() |
| | unr = model_handler.easyocr_ocr(himage).lower().strip() |
| |
|
| | if is_blank(easytag) or is_blank(unr): |
| | results["Empty/Illegible/Black Tagline"] = 1 |
| | return results |
| |
|
| | is_eng = is_english(easytag) |
| | if not is_eng: |
| | results["Empty/Illegible/Black Tagline"] = 0 |
| | tag = easytag |
| | else: |
| | Tag = model_handler.intern(image, config.PTAG, 25).strip() |
| | tag = Tag.lower() |
| |
|
| | htag = model_handler.intern(himage, config.PTAG, 25).lower().strip() |
| | if is_unreadable_tagline(htag, tag): |
| | results["Empty/Illegible/Black Tagline"] = 1 |
| | |
| | results["Incomplete Tagline"] = 1 if is_incomplete_tagline(tag, is_eng) else 0 |
| | results["Hyperlink"] = 1 if is_hyperlink_tagline(tag) else 0 |
| | results["Price Tag"] = 1 if is_price_tagline(tag) else 0 |
| |
|
| | imagedt = get_roi(image_path, *config.DTAG) |
| | dtag = model_handler.easyocr_ocr(imagedt).strip() |
| | results["Multiple Taglines"] = 0 if is_blank(dtag) else 1 |
| |
|
| | emoji_resp = model_handler.intern(image, config.PEMO, 100) |
| | results["Excessive Emojis"] = 1 if is_multiple_emoji(emoji_resp) else 0 |
| |
|
| | return results |
| |
|
| | def cta(image_path): |
| | image = get_roi(image_path, *config.CTA) |
| | cta_text = model_handler.intern(image, config.PTAG, 5).strip() |
| | veng = is_valid_english(cta_text) |
| | eng = is_english(cta_text) |
| |
|
| | if '.' in cta_text or '..' in cta_text or '...' in cta_text: |
| | return {"Bad CTA": 1} |
| |
|
| | if any(emoji.is_emoji(c) for c in cta_text): |
| | return {"Bad CTA": 1} |
| |
|
| | clean_cta_text = clean_text(cta_text) |
| | |
| |
|
| | if eng and len(clean_cta_text) <= 2: |
| | return {"Bad CTA": 1} |
| |
|
| | if len(clean_cta_text) > 15: |
| | return {"Bad CTA": 1} |
| |
|
| | return {"Bad CTA": 0} |
| |
|
| | def tnc(image_path): |
| | image = get_roi(image_path, *config.TNC) |
| | tnc_text = model_handler.easyocr_ocr(image) |
| | clean_tnc = clean_text(tnc_text) |
| |
|
| | return {"Terms & Conditions": 0 if is_blank(clean_tnc) else 1} |
| |
|
| | def tooMuchText(image_path): |
| | DRIB = (0.04, 0.625, 1.0, 0.677) |
| | DUP = (0, 0, 1.0, 0.25) |
| | DBEL = (0, 0.85, 1.0, 1) |
| | image = Image.open(image_path).convert('RGB') |
| | image = destroy_text_roi(image, *DRIB) |
| | image = destroy_text_roi(image, *DUP) |
| | image = destroy_text_roi(image, *DBEL) |
| | bd = model_handler.easyocr_ocr(image).lower().strip() |
| | return {"Too Much Text": 1 if len(bd) > 55 else 0} |
| |
|