염산하

@[email protected]

hollo 에 코드도 잘 들어가나? (길이 제한은 있겠지만... )

// Name: OCR
// Description: Capture a screenshot and recognize the text using OpenAI

import "@johnlindquist/kit";
import OpenAI from 'openai';

const clipboardImage = await clipboard.readImage()

if (clipboardImage.byteLength) {
    const apiKey = await env("OPENAI_API_KEY")
    const openai = new OpenAI({
        apiKey: apiKey
    })

    console.log("OCR started")

    const imageBase64 = await clipboardImage.toString('base64')
    const dataUri = `data:image/png;base64,${imageBase64}`

    console.log("base64 done")
    console.log("calling openai...")

    const response = await openai.responses.create({
        model: 'gpt-4.1-mini',
        input: [
            {
                role: 'system',
                content: 'You are a OCR assistant that extracts text from images',
            },
            {
                role: 'user',
                content: [
                    {
                        type: 'input_text',
                        text: 'Extract the text from the image',
                    },
                    {
                        type: 'input_image',
                        image_url: dataUri,
                        detail: 'high',
                    },
                ],
            },
        ],
    });
    console.log("openai done")
    console.log(`writing to clipboard... ${response.output_text}`)

    await clipboard.writeText(response.output_text);

    notify({
        title: "OCR finished",
        message: `Copied text to your clipboard`,
    })
} else {
    notify({
        title: "OCR failed",
        message: `No image found in clipboard`,
    })
}
염산하

@[email protected] · Reply to 염산하's post

좋아! 호환형 completions 로 바꾸고, gemini 2.5 Flash preview 로 바꿔서 공짜로 하자

// Name: OCR
// Description: Capture a screenshot and recognize the text using OpenAI

import "@johnlindquist/kit";
import OpenAI from 'openai';

const clipboardImage = await clipboard.readImage()

if (clipboardImage.byteLength) {
    // const apiKey = await env("OPENAI_API_KEY")
    const apiKey = await env("GEMINI_AI_STUDIO_API_KEY")
    const openai = new OpenAI({
        apiKey: apiKey,
        // gemini 사용하기 위한 호환 엔드포인트
        baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/"
    })

    console.log("OCR started")

    const imageBase64 = await clipboardImage.toString('base64')
    const dataUri = `data:image/png;base64,${imageBase64}`

    console.log("base64 done")
    console.log("calling openai...")

    try {

        const response = await openai.chat.completions.create({
            // model: 'gpt-4.1-mini',
            model: 'gemini-2.5-flash-preview-04-17', // 현재 무료!
            messages: [
                {
                    role: 'system',
                    content: 'You are a OCR assistant that extracts text from images',
                },
                {
                    role: 'user',
                    content: [
                        {
                            type: 'text',
                            text: 'Extract the text from the image',
                        },
                        {
                            type: 'image_url',
                            image_url: { url: dataUri },
                        },
                    ],
                },
            ],
        });
        const outputText = response.choices[0].message.content
        console.log("openai done")
        console.log(`writing to clipboard... ${outputText}`)

        await clipboard.writeText(outputText);

        notify({
            title: "OCR finished",
            message: `Copied text to your clipboard`,
        })
    } catch (error) {
        console.error("Error calling Google compatibility endpoint:", error);
        notify({
            title: "OCR failed",
            message: `Error: ${error.message || 'Unknown error'}`,
        });
    }

} else {
    notify({
        title: "OCR failed",
        message: `No image found in clipboard`,
    })
}