79570316

Date: 2025-04-12 10:34:30
Score: 5
Natty:
Report link

did you find the solution? I am having the same problem on deepseekr1 and falcon3:10b and it seems to always happen on the same questions. It worked from question 1-6 and 8 but gave no response on question 7,9,10.

import ollama
import time
import traceback
import json

models = [ 
    "falcon3:10b"
]

questions = [
    r"Solve the PDE: \(u_{tt}=c^{2}u_{xx}\),with initial conditions \(u(x,0)=\sin (x)\) ,\(u_{t}(x,0)=0\).",
    "Compute the Lebesgue integral of the Dirichlet function on [0,1].",
    "Design a nondeterministic Turing machine that decides the language L={0^n 1^n∣n≥0}",
    "Prove that the halting problem is undecidable without referencing diagonalization.",
    "Optimize the Fibonacci sequence calculation to O(1) space complexity.",
    "Derive the Euler-Lagrange equations for a pendulum with air resistance proportional to velocity.",
    "Explain the Born rule in quantum mechanics and its interpretation.",
    "Explain the Black-Scholes PDE and its assumptions. Derive the closed-form solution for a European call option.",
    "Describe the Diffie-Hellman key exchange protocol and its vulnerability to quantum attacks.",
    "Model the spread of a virus using a SIR model with time-varying transmission rates.",
    "Write a Python function to compute the nth prime number, optimized for n > 10^6",
    "If the roots of lx2+2mx+n=0 are real & distinct, then the roots of (l+n)(lx2+2mx+n)=2(ln−m2)(x2+1) will be:",
    r"show that (without induction) $$\frac{1}{\displaystyle\prod_{i=0}^{i=n}A_{i}}=n!\int\limits_{|\Delta^{n}|}\frac{\mathrm d\sigma}{\left( \displaystyle \sum\limits_i s_i A_i \right)^n}$$ where $\mathrm d\sigma$ is the Lebesgue measure on the standard $n$-simplex $|\Delta^{n}|$, and $s_i$ are dummy integration variables."
]

log_file = r"C:\Users\ubuntu\Desktop\math model test results.txt"
max_retries = 3
retry_delay = 10  # seconds
wait_between_prompts = 30  # seconds

def log(message):
    print(message)
    with open(log_file, "a", encoding="utf-8") as f:
        f.write(message + "\n")

def get_resume_info(log_file_path, models):
    model_data = {}  # {model: {'last_attempted': int, 'last_completed': int}}
    current_model = None
    last_model = None

    try:
        with open(log_file_path, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if line.startswith('=== Testing Model: '):
                    model_name = line[len('=== Testing Model: '):].split(' ===', 1)[0].strip()
                    if model_name in models:
                        current_model = model_name
                        if current_model not in model_data:
                            model_data[current_model] = {'last_attempted': 0, 'last_completed': 0}
                        last_model = current_model
                elif line.startswith('Question '):
                    if current_model:
                        q_num = int(line.split()[1].split(':')[0])
                        model_data[current_model]['last_attempted'] = q_num
                elif line.startswith('Response from '):
                    if current_model and model_data[current_model]['last_attempted'] > model_data[current_model]['last_completed']:
                        model_data[current_model]['last_completed'] = model_data[current_model]['last_attempted']
    except FileNotFoundError:
        pass

    if last_model:
        data = model_data.get(last_model, {'last_attempted': 0, 'last_completed': 0})
        if data['last_attempted'] > data['last_completed']:
            # Resume at the incompletely logged question
            return last_model, data['last_attempted']
        else:
            # Resume at next question
            return last_model, data['last_completed'] + 1
    else:
        return None, 1  # Start fresh

# Determine where to resume
last_model, start_question = get_resume_info(log_file, models)

start_model_index = 0
if last_model:
    try:
        start_model_index = models.index(last_model)
        # Check if we need to move to next model
        if start_question > len(questions):
            start_model_index += 1
            start_question = 1
    except ValueError:
        pass  # Model not found, start from beginning

# Clear log only if starting fresh
if last_model is None:
    open(log_file, "w").close()

for model_idx in range(start_model_index, len(models)):
    model = models[model_idx]
    log(f"\n=== Testing Model: {model} ===\n")
    
    # Determine starting question for this model
    if model == last_model:
        q_start = start_question
    else:
        q_start = 1
    
    for q_idx in range(q_start - 1, len(questions)):
        question = questions[q_idx]
        i = q_idx + 1  # 1-based index
        
        # Optionally, add an explicit end-of-answer cue to the question
        # question += "\n\nPlease ensure that your answer is complete and end with '#END'."
        
        log(f"Waiting {wait_between_prompts} seconds before next prompt...\n")
        time.sleep(wait_between_prompts)
        
        log(f"Question {i}: {question}")
        attempt = 0
        success = False
        while attempt < max_retries and not success:
            try:
                start_time = time.time()
                response = ollama.chat(
                    model=model,
                    messages=[{"role": "user", "content": question}]
                )
                time_taken = time.time() - start_time
                
                # Log raw response for debugging
                log(f"Raw response object (string): {str(response)}")

                
                content = response.get('message', {}).get('content', '').strip()
                
                # Check if the response seems suspiciously short
                if len(content) < 50:
                    log(f"⚠️ Warning: Response length ({len(content)}) seems too short. Possible incomplete output.")
                
                log(f"\nResponse from {model}:\n{content}")
                log(f"Time taken: {time_taken:.2f} sec\n" + "-" * 60)
                success = True
            except Exception as e:
                attempt += 1
                error_info = f"Attempt {attempt} failed for model {model} on question {i}: {e}"
                log(error_info)
                if attempt < max_retries:
                    log(f"Retrying in {retry_delay} seconds...\n")
                    time.sleep(retry_delay)
                else:
                    log(f"Failed after {max_retries} attempts.\n")
                    log(traceback.format_exc())
                    log("-" * 60)

input('Press Enter to exit')
Reasons:
  • Whitelisted phrase (-1): It worked
  • RegEx Blacklisted phrase (3): did you find the solution
  • Long answer (-1):
  • Has code block (-0.5):
  • Me too answer (2.5): I am having the same problem
  • Contains question mark (0.5):
  • Starts with a question (0.5): did you find the solution
  • Low reputation (1):
Posted by: SeveralExtent