import nltk
nltk.download('punkt')
import math
from collections import Counter
def tokenize(sentence):
return nltk.word_tokenize(sentence)
def calculate_ngram(candidate, n):
ngrams = []
for i in range(len(candidate)-n+1):
ngram = tuple(candidate[i:i+n])
ngrams.append(ngram)
return ngrams
def calculate_precision(candidate, references, n):
candidate_ngrams = calculate_ngram(candidate, n)
reference_ngrams = [calculate_ngram(ref, n) for ref in references]
candidate_counter = Counter(candidate_ngrams)
reference_counters = [Counter(ref) for ref in reference_ngrams]
clipped_counts = dict()
for ngram, count in candidate_counter.items():
max_reference_count = max(ref_counter[ngram] for ref_counter in reference_counters)
clipped_counts[ngram] = min(count, max_reference_count)
numerator = sum(clipped_counts.values())
denominator = max(1, sum(candidate_counter.values()))
precision = numerator / denominator
return precision
def calculate_bleu(candidate, references, weights):
candidate_tokens = tokenize(candidate)
reference_tokens = [tokenize(ref) for ref in references]
precisions = []
for n in range(1, len(weights) + 1):
precision = calculate_precision(candidate_tokens, reference_tokens, n)
precisions.append(precision)
# Handling NaN or infinite values in precision
precisions = [p if not math.isnan(p) and p != 0.0 else 1e-10 for p in precisions]
geo_mean = math.exp(sum((w * math.log(p) for w, p in zip(weights, precisions))) / len(weights))
brevity_penalty = min(1.0, len(candidate_tokens) / min(len(ref) for ref in reference_tokens))
bleu = brevity_penalty * geo_mean
return bleu
# Example usage
candidate = "The cat is on the mat"
references = ["There is a cat on the mat", "The mat has a cat"]
weights = [0.25, 0.25, 0.25, 0.25]
bleu_score = calculate_bleu(candidate, references, weights)
print("BLEU score:", bleu_score)