Include full contents of all nested repositories
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
272
openclaw/scripts/docs-i18n/doc_mode.go
Normal file
272
openclaw/scripts/docs-i18n/doc_mode.go
Normal file
@@ -0,0 +1,272 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
const (
|
||||
frontmatterTagStart = "<frontmatter>"
|
||||
frontmatterTagEnd = "</frontmatter>"
|
||||
bodyTagStart = "<body>"
|
||||
bodyTagEnd = "</body>"
|
||||
)
|
||||
|
||||
func processFileDoc(ctx context.Context, translator *PiTranslator, docsRoot, filePath, srcLang, tgtLang string, overwrite bool) (bool, error) {
|
||||
absPath, relPath, err := resolveDocsPath(docsRoot, filePath)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
content, err := os.ReadFile(absPath)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
currentHash := hashBytes(content)
|
||||
|
||||
outputPath := filepath.Join(docsRoot, tgtLang, relPath)
|
||||
if !overwrite {
|
||||
skip, err := shouldSkipDoc(outputPath, currentHash)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if skip {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
sourceFront, sourceBody := splitFrontMatter(string(content))
|
||||
frontData := map[string]any{}
|
||||
if strings.TrimSpace(sourceFront) != "" {
|
||||
if err := yaml.Unmarshal([]byte(sourceFront), &frontData); err != nil {
|
||||
return false, fmt.Errorf("frontmatter parse failed for %s: %w", relPath, err)
|
||||
}
|
||||
}
|
||||
frontTemplate, markers := buildFrontmatterTemplate(frontData)
|
||||
taggedInput := formatTaggedDocument(frontTemplate, sourceBody)
|
||||
|
||||
translatedDoc, err := translator.TranslateRaw(ctx, taggedInput, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("translate failed (%s): %w", relPath, err)
|
||||
}
|
||||
|
||||
translatedFront, translatedBody, err := parseTaggedDocument(translatedDoc)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("tagged output invalid for %s: %w", relPath, err)
|
||||
}
|
||||
if sourceFront != "" && strings.TrimSpace(translatedFront) == "" {
|
||||
return false, fmt.Errorf("translation removed frontmatter for %s", relPath)
|
||||
}
|
||||
if err := applyFrontmatterTranslations(frontData, markers, translatedFront); err != nil {
|
||||
return false, fmt.Errorf("frontmatter translation failed for %s: %w", relPath, err)
|
||||
}
|
||||
|
||||
updatedFront, err := encodeFrontMatter(frontData, relPath, content)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(filepath.Dir(outputPath), 0o755); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
output := updatedFront + translatedBody
|
||||
return false, os.WriteFile(outputPath, []byte(output), 0o644)
|
||||
}
|
||||
|
||||
func formatTaggedDocument(frontMatter, body string) string {
|
||||
return fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", frontmatterTagStart, frontMatter, frontmatterTagEnd, bodyTagStart, body, bodyTagEnd)
|
||||
}
|
||||
|
||||
func parseTaggedDocument(text string) (string, string, error) {
|
||||
frontStart := strings.Index(text, frontmatterTagStart)
|
||||
if frontStart == -1 {
|
||||
return "", "", fmt.Errorf("missing %s", frontmatterTagStart)
|
||||
}
|
||||
frontStart += len(frontmatterTagStart)
|
||||
frontEnd := strings.Index(text[frontStart:], frontmatterTagEnd)
|
||||
if frontEnd == -1 {
|
||||
return "", "", fmt.Errorf("missing %s", frontmatterTagEnd)
|
||||
}
|
||||
frontEnd += frontStart
|
||||
|
||||
bodyStart := strings.Index(text[frontEnd:], bodyTagStart)
|
||||
if bodyStart == -1 {
|
||||
return "", "", fmt.Errorf("missing %s", bodyTagStart)
|
||||
}
|
||||
bodyStart += frontEnd + len(bodyTagStart)
|
||||
bodyEnd := strings.Index(text[bodyStart:], bodyTagEnd)
|
||||
if bodyEnd == -1 {
|
||||
return "", "", fmt.Errorf("missing %s", bodyTagEnd)
|
||||
}
|
||||
bodyEnd += bodyStart
|
||||
|
||||
prefix := strings.TrimSpace(text[:frontStart-len(frontmatterTagStart)])
|
||||
suffix := strings.TrimSpace(text[bodyEnd+len(bodyTagEnd):])
|
||||
if prefix != "" || suffix != "" {
|
||||
return "", "", fmt.Errorf("unexpected text outside tagged sections")
|
||||
}
|
||||
|
||||
frontMatter := trimTagNewlines(text[frontStart:frontEnd])
|
||||
body := trimTagNewlines(text[bodyStart:bodyEnd])
|
||||
return frontMatter, body, nil
|
||||
}
|
||||
|
||||
func trimTagNewlines(value string) string {
|
||||
value = strings.TrimPrefix(value, "\n")
|
||||
value = strings.TrimSuffix(value, "\n")
|
||||
return value
|
||||
}
|
||||
|
||||
type frontmatterMarker struct {
|
||||
Field string
|
||||
Index int
|
||||
Start string
|
||||
End string
|
||||
}
|
||||
|
||||
func buildFrontmatterTemplate(data map[string]any) (string, []frontmatterMarker) {
|
||||
if len(data) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
markers := []frontmatterMarker{}
|
||||
lines := []string{}
|
||||
|
||||
if summary, ok := data["summary"].(string); ok {
|
||||
start, end := markerPair("SUMMARY", 0)
|
||||
markers = append(markers, frontmatterMarker{Field: "summary", Index: 0, Start: start, End: end})
|
||||
lines = append(lines, fmt.Sprintf("summary: %s%s%s", start, summary, end))
|
||||
}
|
||||
|
||||
if title, ok := data["title"].(string); ok {
|
||||
start, end := markerPair("TITLE", 0)
|
||||
markers = append(markers, frontmatterMarker{Field: "title", Index: 0, Start: start, End: end})
|
||||
lines = append(lines, fmt.Sprintf("title: %s%s%s", start, title, end))
|
||||
}
|
||||
|
||||
if readWhen, ok := data["read_when"].([]any); ok {
|
||||
lines = append(lines, "read_when:")
|
||||
for idx, item := range readWhen {
|
||||
textValue, ok := item.(string)
|
||||
if !ok {
|
||||
lines = append(lines, fmt.Sprintf(" - %v", item))
|
||||
continue
|
||||
}
|
||||
start, end := markerPair("READ_WHEN", idx)
|
||||
markers = append(markers, frontmatterMarker{Field: "read_when", Index: idx, Start: start, End: end})
|
||||
lines = append(lines, fmt.Sprintf(" - %s%s%s", start, textValue, end))
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(lines, "\n"), markers
|
||||
}
|
||||
|
||||
func markerPair(field string, index int) (string, string) {
|
||||
return fmt.Sprintf("[[[FM_%s_%d_START]]]", field, index), fmt.Sprintf("[[[FM_%s_%d_END]]]", field, index)
|
||||
}
|
||||
|
||||
func applyFrontmatterTranslations(data map[string]any, markers []frontmatterMarker, translatedFront string) error {
|
||||
if len(markers) == 0 {
|
||||
return nil
|
||||
}
|
||||
for _, marker := range markers {
|
||||
value, err := extractMarkerValue(translatedFront, marker.Start, marker.End)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
value = strings.TrimSpace(value)
|
||||
switch marker.Field {
|
||||
case "summary":
|
||||
data["summary"] = value
|
||||
case "title":
|
||||
data["title"] = value
|
||||
case "read_when":
|
||||
data["read_when"] = setReadWhenValue(data["read_when"], marker.Index, value)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func extractMarkerValue(text, start, end string) (string, error) {
|
||||
startIndex := strings.Index(text, start)
|
||||
if startIndex == -1 {
|
||||
return "", fmt.Errorf("missing marker %s", start)
|
||||
}
|
||||
startIndex += len(start)
|
||||
endIndex := strings.Index(text[startIndex:], end)
|
||||
if endIndex == -1 {
|
||||
return "", fmt.Errorf("missing marker %s", end)
|
||||
}
|
||||
endIndex += startIndex
|
||||
return text[startIndex:endIndex], nil
|
||||
}
|
||||
|
||||
func setReadWhenValue(existing any, index int, value string) []any {
|
||||
readWhen, ok := existing.([]any)
|
||||
if !ok {
|
||||
readWhen = []any{}
|
||||
}
|
||||
for len(readWhen) <= index {
|
||||
readWhen = append(readWhen, "")
|
||||
}
|
||||
readWhen[index] = value
|
||||
return readWhen
|
||||
}
|
||||
|
||||
func shouldSkipDoc(outputPath string, sourceHash string) (bool, error) {
|
||||
data, err := os.ReadFile(outputPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
frontMatter, _ := splitFrontMatter(string(data))
|
||||
if frontMatter == "" {
|
||||
return false, nil
|
||||
}
|
||||
frontData := map[string]any{}
|
||||
if err := yaml.Unmarshal([]byte(frontMatter), &frontData); err != nil {
|
||||
return false, nil
|
||||
}
|
||||
storedHash := extractSourceHash(frontData)
|
||||
if storedHash == "" {
|
||||
return false, nil
|
||||
}
|
||||
return strings.EqualFold(storedHash, sourceHash), nil
|
||||
}
|
||||
|
||||
func extractSourceHash(frontData map[string]any) string {
|
||||
xi, ok := frontData["x-i18n"].(map[string]any)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
value, ok := xi["source_hash"].(string)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(value)
|
||||
}
|
||||
|
||||
func resolveDocsPath(docsRoot, filePath string) (string, string, error) {
|
||||
absPath, err := filepath.Abs(filePath)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
relPath, err := filepath.Rel(docsRoot, absPath)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
if relPath == "." || relPath == "" {
|
||||
return "", "", fmt.Errorf("file %s resolves to docs root %s", absPath, docsRoot)
|
||||
}
|
||||
if filepath.IsAbs(relPath) || relPath == ".." || strings.HasPrefix(relPath, ".."+string(filepath.Separator)) {
|
||||
return "", "", fmt.Errorf("file %s not under docs root %s", absPath, docsRoot)
|
||||
}
|
||||
return absPath, relPath, nil
|
||||
}
|
||||
29
openclaw/scripts/docs-i18n/glossary.go
Normal file
29
openclaw/scripts/docs-i18n/glossary.go
Normal file
@@ -0,0 +1,29 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
type GlossaryEntry struct {
|
||||
Source string `json:"source"`
|
||||
Target string `json:"target"`
|
||||
}
|
||||
|
||||
func LoadGlossary(path string) ([]GlossaryEntry, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
var entries []GlossaryEntry
|
||||
if err := json.Unmarshal(data, &entries); err != nil {
|
||||
return nil, fmt.Errorf("glossary parse failed: %w", err)
|
||||
}
|
||||
|
||||
return entries, nil
|
||||
}
|
||||
10
openclaw/scripts/docs-i18n/go.mod
Normal file
10
openclaw/scripts/docs-i18n/go.mod
Normal file
@@ -0,0 +1,10 @@
|
||||
module github.com/openclaw/openclaw/scripts/docs-i18n
|
||||
|
||||
go 1.24.0
|
||||
|
||||
require (
|
||||
github.com/joshp123/pi-golang v0.0.4
|
||||
github.com/yuin/goldmark v1.7.8
|
||||
golang.org/x/net v0.50.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
10
openclaw/scripts/docs-i18n/go.sum
Normal file
10
openclaw/scripts/docs-i18n/go.sum
Normal file
@@ -0,0 +1,10 @@
|
||||
github.com/joshp123/pi-golang v0.0.4 h1:82HISyKNN8bIl2lvAd65462LVCQIsjhaUFQxyQgg5Xk=
|
||||
github.com/joshp123/pi-golang v0.0.4/go.mod h1:9mHEQkeJELYzubXU3b86/T8yedI/iAOKx0Tz0c41qes=
|
||||
github.com/yuin/goldmark v1.7.8 h1:iERMLn0/QJeHFhxSt3p6PeN9mGnvIKSpG9YYorDMnic=
|
||||
github.com/yuin/goldmark v1.7.8/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E=
|
||||
golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60=
|
||||
golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
160
openclaw/scripts/docs-i18n/html_translate.go
Normal file
160
openclaw/scripts/docs-i18n/html_translate.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/extension"
|
||||
"github.com/yuin/goldmark/text"
|
||||
"golang.org/x/net/html"
|
||||
"sort"
|
||||
)
|
||||
|
||||
type htmlReplacement struct {
|
||||
Start int
|
||||
Stop int
|
||||
Value string
|
||||
}
|
||||
|
||||
func translateHTMLBlocks(ctx context.Context, translator *PiTranslator, body, srcLang, tgtLang string) (string, error) {
|
||||
source := []byte(body)
|
||||
r := text.NewReader(source)
|
||||
md := goldmark.New(
|
||||
goldmark.WithExtensions(extension.GFM),
|
||||
)
|
||||
doc := md.Parser().Parse(r)
|
||||
|
||||
replacements := make([]htmlReplacement, 0, 8)
|
||||
|
||||
_ = ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
block, ok := n.(*ast.HTMLBlock)
|
||||
if !ok {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
start, stop, ok := htmlBlockSpan(block, source)
|
||||
if !ok {
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
htmlText := string(source[start:stop])
|
||||
translated, err := translateHTMLBlock(ctx, translator, htmlText, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return ast.WalkStop, err
|
||||
}
|
||||
replacements = append(replacements, htmlReplacement{Start: start, Stop: stop, Value: translated})
|
||||
return ast.WalkSkipChildren, nil
|
||||
})
|
||||
|
||||
if len(replacements) == 0 {
|
||||
return body, nil
|
||||
}
|
||||
|
||||
return applyHTMLReplacements(body, replacements), nil
|
||||
}
|
||||
|
||||
func htmlBlockSpan(block *ast.HTMLBlock, source []byte) (int, int, bool) {
|
||||
lines := block.Lines()
|
||||
if lines.Len() == 0 {
|
||||
return 0, 0, false
|
||||
}
|
||||
start := lines.At(0).Start
|
||||
stop := lines.At(lines.Len() - 1).Stop
|
||||
if start >= stop {
|
||||
return 0, 0, false
|
||||
}
|
||||
return start, stop, true
|
||||
}
|
||||
|
||||
func applyHTMLReplacements(body string, replacements []htmlReplacement) string {
|
||||
if len(replacements) == 0 {
|
||||
return body
|
||||
}
|
||||
sortHTMLReplacements(replacements)
|
||||
var out strings.Builder
|
||||
last := 0
|
||||
for _, rep := range replacements {
|
||||
if rep.Start < last {
|
||||
continue
|
||||
}
|
||||
out.WriteString(body[last:rep.Start])
|
||||
out.WriteString(rep.Value)
|
||||
last = rep.Stop
|
||||
}
|
||||
out.WriteString(body[last:])
|
||||
return out.String()
|
||||
}
|
||||
|
||||
func sortHTMLReplacements(replacements []htmlReplacement) {
|
||||
sort.Slice(replacements, func(i, j int) bool {
|
||||
return replacements[i].Start < replacements[j].Start
|
||||
})
|
||||
}
|
||||
|
||||
func translateHTMLBlock(ctx context.Context, translator *PiTranslator, htmlText, srcLang, tgtLang string) (string, error) {
|
||||
tokenizer := html.NewTokenizer(strings.NewReader(htmlText))
|
||||
var out strings.Builder
|
||||
skipDepth := 0
|
||||
|
||||
for {
|
||||
tt := tokenizer.Next()
|
||||
if tt == html.ErrorToken {
|
||||
if err := tokenizer.Err(); err != nil && err != io.EOF {
|
||||
return "", err
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
raw := string(tokenizer.Raw())
|
||||
tok := tokenizer.Token()
|
||||
|
||||
switch tt {
|
||||
case html.StartTagToken:
|
||||
out.WriteString(raw)
|
||||
if isSkipTag(strings.ToLower(tok.Data)) {
|
||||
skipDepth++
|
||||
}
|
||||
case html.EndTagToken:
|
||||
out.WriteString(raw)
|
||||
if isSkipTag(strings.ToLower(tok.Data)) && skipDepth > 0 {
|
||||
skipDepth--
|
||||
}
|
||||
case html.SelfClosingTagToken:
|
||||
out.WriteString(raw)
|
||||
case html.TextToken:
|
||||
if shouldTranslateHTMLText(skipDepth, raw) {
|
||||
translated, err := translator.Translate(ctx, raw, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
out.WriteString(translated)
|
||||
} else {
|
||||
out.WriteString(raw)
|
||||
}
|
||||
default:
|
||||
out.WriteString(raw)
|
||||
}
|
||||
}
|
||||
|
||||
return out.String(), nil
|
||||
}
|
||||
|
||||
func shouldTranslateHTMLText(skipDepth int, text string) bool {
|
||||
if strings.TrimSpace(text) == "" {
|
||||
return false
|
||||
}
|
||||
return skipDepth == 0
|
||||
}
|
||||
|
||||
func isSkipTag(tag string) bool {
|
||||
switch tag {
|
||||
case "code", "pre", "script", "style":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
273
openclaw/scripts/docs-i18n/main.go
Normal file
273
openclaw/scripts/docs-i18n/main.go
Normal file
@@ -0,0 +1,273 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type docJob struct {
|
||||
index int
|
||||
path string
|
||||
rel string
|
||||
}
|
||||
|
||||
type docResult struct {
|
||||
index int
|
||||
rel string
|
||||
duration time.Duration
|
||||
skipped bool
|
||||
err error
|
||||
}
|
||||
|
||||
func main() {
|
||||
var (
|
||||
targetLang = flag.String("lang", "zh-CN", "target language (e.g., zh-CN)")
|
||||
sourceLang = flag.String("src", "en", "source language")
|
||||
docsRoot = flag.String("docs", "docs", "docs root")
|
||||
tmPath = flag.String("tm", "", "translation memory path")
|
||||
mode = flag.String("mode", "segment", "translation mode (segment|doc)")
|
||||
thinking = flag.String("thinking", "high", "thinking level (low|high)")
|
||||
overwrite = flag.Bool("overwrite", false, "overwrite existing translations")
|
||||
maxFiles = flag.Int("max", 0, "max files to process (0 = all)")
|
||||
parallel = flag.Int("parallel", 1, "parallel workers for doc mode")
|
||||
)
|
||||
flag.Parse()
|
||||
files := flag.Args()
|
||||
if len(files) == 0 {
|
||||
fatal(fmt.Errorf("no doc files provided"))
|
||||
}
|
||||
|
||||
resolvedDocsRoot, err := filepath.Abs(*docsRoot)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
if *tmPath == "" {
|
||||
*tmPath = filepath.Join(resolvedDocsRoot, ".i18n", fmt.Sprintf("%s.tm.jsonl", *targetLang))
|
||||
}
|
||||
|
||||
glossaryPath := filepath.Join(resolvedDocsRoot, ".i18n", fmt.Sprintf("glossary.%s.json", *targetLang))
|
||||
glossary, err := LoadGlossary(glossaryPath)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
translator, err := NewPiTranslator(*sourceLang, *targetLang, glossary, *thinking)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
defer translator.Close()
|
||||
|
||||
tm, err := LoadTranslationMemory(*tmPath)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
ordered, err := orderFiles(resolvedDocsRoot, files)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
totalFiles := len(ordered)
|
||||
preSkipped := 0
|
||||
if *mode == "doc" && !*overwrite {
|
||||
filtered, skipped, err := filterDocQueue(resolvedDocsRoot, *targetLang, ordered)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
ordered = filtered
|
||||
preSkipped = skipped
|
||||
}
|
||||
if *maxFiles > 0 && *maxFiles < len(ordered) {
|
||||
ordered = ordered[:*maxFiles]
|
||||
}
|
||||
|
||||
log.SetFlags(log.LstdFlags)
|
||||
start := time.Now()
|
||||
processed := 0
|
||||
skipped := 0
|
||||
|
||||
if *parallel < 1 {
|
||||
*parallel = 1
|
||||
}
|
||||
|
||||
log.Printf("docs-i18n: mode=%s total=%d pending=%d pre_skipped=%d overwrite=%t thinking=%s parallel=%d", *mode, totalFiles, len(ordered), preSkipped, *overwrite, *thinking, *parallel)
|
||||
switch *mode {
|
||||
case "doc":
|
||||
if *parallel > 1 {
|
||||
proc, skip, err := runDocParallel(context.Background(), ordered, resolvedDocsRoot, *sourceLang, *targetLang, *overwrite, *parallel, glossary, *thinking)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
processed += proc
|
||||
skipped += skip
|
||||
} else {
|
||||
proc, skip, err := runDocSequential(context.Background(), ordered, translator, resolvedDocsRoot, *sourceLang, *targetLang, *overwrite)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
processed += proc
|
||||
skipped += skip
|
||||
}
|
||||
case "segment":
|
||||
if *parallel > 1 {
|
||||
fatal(fmt.Errorf("parallel processing is only supported in doc mode"))
|
||||
}
|
||||
proc, err := runSegmentSequential(context.Background(), ordered, translator, tm, resolvedDocsRoot, *sourceLang, *targetLang)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
processed += proc
|
||||
default:
|
||||
fatal(fmt.Errorf("unknown mode: %s", *mode))
|
||||
}
|
||||
|
||||
if err := tm.Save(); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
elapsed := time.Since(start).Round(time.Millisecond)
|
||||
log.Printf("docs-i18n: completed processed=%d skipped=%d elapsed=%s", processed, skipped, elapsed)
|
||||
}
|
||||
|
||||
func runDocSequential(ctx context.Context, ordered []string, translator *PiTranslator, docsRoot, srcLang, tgtLang string, overwrite bool) (int, int, error) {
|
||||
processed := 0
|
||||
skipped := 0
|
||||
for index, file := range ordered {
|
||||
relPath := resolveRelPath(docsRoot, file)
|
||||
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
|
||||
start := time.Now()
|
||||
skip, err := processFileDoc(ctx, translator, docsRoot, file, srcLang, tgtLang, overwrite)
|
||||
if err != nil {
|
||||
return processed, skipped, err
|
||||
}
|
||||
if skip {
|
||||
skipped++
|
||||
log.Printf("docs-i18n: [%d/%d] skipped %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
||||
} else {
|
||||
processed++
|
||||
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
||||
}
|
||||
}
|
||||
return processed, skipped, nil
|
||||
}
|
||||
|
||||
func runDocParallel(ctx context.Context, ordered []string, docsRoot, srcLang, tgtLang string, overwrite bool, parallel int, glossary []GlossaryEntry, thinking string) (int, int, error) {
|
||||
jobs := make(chan docJob)
|
||||
results := make(chan docResult, len(ordered))
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for worker := 0; worker < parallel; worker++ {
|
||||
wg.Add(1)
|
||||
go func(workerID int) {
|
||||
defer wg.Done()
|
||||
translator, err := NewPiTranslator(srcLang, tgtLang, glossary, thinking)
|
||||
if err != nil {
|
||||
results <- docResult{err: err}
|
||||
return
|
||||
}
|
||||
defer translator.Close()
|
||||
for job := range jobs {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
log.Printf("docs-i18n: [w%d %d/%d] start %s", workerID, job.index, len(ordered), job.rel)
|
||||
start := time.Now()
|
||||
skip, err := processFileDoc(ctx, translator, docsRoot, job.path, srcLang, tgtLang, overwrite)
|
||||
results <- docResult{
|
||||
index: job.index,
|
||||
rel: job.rel,
|
||||
duration: time.Since(start),
|
||||
skipped: skip,
|
||||
err: err,
|
||||
}
|
||||
if err != nil {
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
}
|
||||
}(worker + 1)
|
||||
}
|
||||
|
||||
go func() {
|
||||
for index, file := range ordered {
|
||||
jobs <- docJob{index: index + 1, path: file, rel: resolveRelPath(docsRoot, file)}
|
||||
}
|
||||
close(jobs)
|
||||
}()
|
||||
|
||||
processed := 0
|
||||
skipped := 0
|
||||
for i := 0; i < len(ordered); i++ {
|
||||
result := <-results
|
||||
if result.err != nil {
|
||||
wg.Wait()
|
||||
return processed, skipped, result.err
|
||||
}
|
||||
if result.skipped {
|
||||
skipped++
|
||||
log.Printf("docs-i18n: [w* %d/%d] skipped %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
|
||||
} else {
|
||||
processed++
|
||||
log.Printf("docs-i18n: [w* %d/%d] done %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
return processed, skipped, nil
|
||||
}
|
||||
|
||||
func runSegmentSequential(ctx context.Context, ordered []string, translator *PiTranslator, tm *TranslationMemory, docsRoot, srcLang, tgtLang string) (int, error) {
|
||||
processed := 0
|
||||
for index, file := range ordered {
|
||||
relPath := resolveRelPath(docsRoot, file)
|
||||
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
|
||||
start := time.Now()
|
||||
if _, err := processFile(ctx, translator, tm, docsRoot, file, srcLang, tgtLang); err != nil {
|
||||
return processed, err
|
||||
}
|
||||
processed++
|
||||
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
||||
}
|
||||
return processed, nil
|
||||
}
|
||||
|
||||
func resolveRelPath(docsRoot, file string) string {
|
||||
relPath := file
|
||||
if _, rel, err := resolveDocsPath(docsRoot, file); err == nil {
|
||||
relPath = rel
|
||||
}
|
||||
return relPath
|
||||
}
|
||||
|
||||
func filterDocQueue(docsRoot, targetLang string, ordered []string) ([]string, int, error) {
|
||||
pending := make([]string, 0, len(ordered))
|
||||
skipped := 0
|
||||
for _, file := range ordered {
|
||||
absPath, relPath, err := resolveDocsPath(docsRoot, file)
|
||||
if err != nil {
|
||||
return nil, skipped, err
|
||||
}
|
||||
content, err := os.ReadFile(absPath)
|
||||
if err != nil {
|
||||
return nil, skipped, err
|
||||
}
|
||||
sourceHash := hashBytes(content)
|
||||
outputPath := filepath.Join(docsRoot, targetLang, relPath)
|
||||
skip, err := shouldSkipDoc(outputPath, sourceHash)
|
||||
if err != nil {
|
||||
return nil, skipped, err
|
||||
}
|
||||
if skip {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
pending = append(pending, file)
|
||||
}
|
||||
return pending, skipped, nil
|
||||
}
|
||||
131
openclaw/scripts/docs-i18n/markdown_segments.go
Normal file
131
openclaw/scripts/docs-i18n/markdown_segments.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/extension"
|
||||
"github.com/yuin/goldmark/text"
|
||||
)
|
||||
|
||||
func extractSegments(body, relPath string) ([]Segment, error) {
|
||||
source := []byte(body)
|
||||
r := text.NewReader(source)
|
||||
md := goldmark.New(
|
||||
goldmark.WithExtensions(extension.GFM),
|
||||
)
|
||||
doc := md.Parser().Parse(r)
|
||||
|
||||
segments := make([]Segment, 0, 128)
|
||||
skipDepth := 0
|
||||
var lastBlock ast.Node
|
||||
|
||||
err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
switch n.(type) {
|
||||
case *ast.CodeBlock, *ast.FencedCodeBlock, *ast.CodeSpan, *ast.HTMLBlock, *ast.RawHTML:
|
||||
if entering {
|
||||
skipDepth++
|
||||
} else {
|
||||
skipDepth--
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
if !entering || skipDepth > 0 {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
textNode, ok := n.(*ast.Text)
|
||||
if !ok {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
block := blockParent(textNode)
|
||||
if block == nil {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
textValue := string(textNode.Segment.Value(source))
|
||||
if strings.TrimSpace(textValue) == "" {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
|
||||
start := textNode.Segment.Start
|
||||
stop := textNode.Segment.Stop
|
||||
if len(segments) > 0 && lastBlock == block {
|
||||
last := &segments[len(segments)-1]
|
||||
gap := string(source[last.Stop:start])
|
||||
if strings.TrimSpace(gap) == "" {
|
||||
last.Stop = stop
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
}
|
||||
|
||||
segments = append(segments, Segment{Start: start, Stop: stop})
|
||||
lastBlock = block
|
||||
return ast.WalkContinue, nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
filtered := make([]Segment, 0, len(segments))
|
||||
for _, seg := range segments {
|
||||
textValue := string(source[seg.Start:seg.Stop])
|
||||
trimmed := strings.TrimSpace(textValue)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
textHash := hashText(textValue)
|
||||
segmentID := segmentID(relPath, textHash)
|
||||
filtered = append(filtered, Segment{
|
||||
Start: seg.Start,
|
||||
Stop: seg.Stop,
|
||||
Text: textValue,
|
||||
TextHash: textHash,
|
||||
SegmentID: segmentID,
|
||||
})
|
||||
}
|
||||
|
||||
sort.Slice(filtered, func(i, j int) bool {
|
||||
return filtered[i].Start < filtered[j].Start
|
||||
})
|
||||
|
||||
return filtered, nil
|
||||
}
|
||||
|
||||
func blockParent(n ast.Node) ast.Node {
|
||||
for node := n.Parent(); node != nil; node = node.Parent() {
|
||||
if isTranslatableBlock(node) {
|
||||
return node
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isTranslatableBlock(n ast.Node) bool {
|
||||
switch n.(type) {
|
||||
case *ast.Paragraph, *ast.Heading, *ast.ListItem:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func applyTranslations(body string, segments []Segment) string {
|
||||
if len(segments) == 0 {
|
||||
return body
|
||||
}
|
||||
var out strings.Builder
|
||||
last := 0
|
||||
for _, seg := range segments {
|
||||
if seg.Start < last {
|
||||
continue
|
||||
}
|
||||
out.WriteString(body[last:seg.Start])
|
||||
out.WriteString(seg.Translated)
|
||||
last = seg.Stop
|
||||
}
|
||||
out.WriteString(body[last:])
|
||||
return out.String()
|
||||
}
|
||||
89
openclaw/scripts/docs-i18n/masking.go
Normal file
89
openclaw/scripts/docs-i18n/masking.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
inlineCodeRe = regexp.MustCompile("`[^`]+`")
|
||||
angleLinkRe = regexp.MustCompile(`<https?://[^>]+>`)
|
||||
linkURLRe = regexp.MustCompile(`\[[^\]]*\]\(([^)]+)\)`)
|
||||
placeholderRe = regexp.MustCompile(`__OC_I18N_\d+__`)
|
||||
)
|
||||
|
||||
func maskMarkdown(text string, nextPlaceholder func() string, placeholders *[]string, mapping map[string]string) string {
|
||||
masked := maskMatches(text, inlineCodeRe, nextPlaceholder, placeholders, mapping)
|
||||
masked = maskMatches(masked, angleLinkRe, nextPlaceholder, placeholders, mapping)
|
||||
masked = maskLinkURLs(masked, nextPlaceholder, placeholders, mapping)
|
||||
return masked
|
||||
}
|
||||
|
||||
func maskMatches(text string, re *regexp.Regexp, nextPlaceholder func() string, placeholders *[]string, mapping map[string]string) string {
|
||||
matches := re.FindAllStringIndex(text, -1)
|
||||
if len(matches) == 0 {
|
||||
return text
|
||||
}
|
||||
var out strings.Builder
|
||||
pos := 0
|
||||
for _, span := range matches {
|
||||
start, end := span[0], span[1]
|
||||
if start < pos {
|
||||
continue
|
||||
}
|
||||
out.WriteString(text[pos:start])
|
||||
placeholder := nextPlaceholder()
|
||||
mapping[placeholder] = text[start:end]
|
||||
*placeholders = append(*placeholders, placeholder)
|
||||
out.WriteString(placeholder)
|
||||
pos = end
|
||||
}
|
||||
out.WriteString(text[pos:])
|
||||
return out.String()
|
||||
}
|
||||
|
||||
func maskLinkURLs(text string, nextPlaceholder func() string, placeholders *[]string, mapping map[string]string) string {
|
||||
matches := linkURLRe.FindAllStringSubmatchIndex(text, -1)
|
||||
if len(matches) == 0 {
|
||||
return text
|
||||
}
|
||||
var out strings.Builder
|
||||
pos := 0
|
||||
for _, span := range matches {
|
||||
fullStart := span[0]
|
||||
urlStart, urlEnd := span[2], span[3]
|
||||
if urlStart < 0 || urlEnd < 0 {
|
||||
continue
|
||||
}
|
||||
if fullStart < pos {
|
||||
continue
|
||||
}
|
||||
out.WriteString(text[pos:urlStart])
|
||||
placeholder := nextPlaceholder()
|
||||
mapping[placeholder] = text[urlStart:urlEnd]
|
||||
*placeholders = append(*placeholders, placeholder)
|
||||
out.WriteString(placeholder)
|
||||
pos = urlEnd
|
||||
}
|
||||
out.WriteString(text[pos:])
|
||||
return out.String()
|
||||
}
|
||||
|
||||
func unmaskMarkdown(text string, placeholders []string, mapping map[string]string) string {
|
||||
out := text
|
||||
for _, placeholder := range placeholders {
|
||||
original := mapping[placeholder]
|
||||
out = strings.ReplaceAll(out, placeholder, original)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func validatePlaceholders(text string, placeholders []string) error {
|
||||
for _, placeholder := range placeholders {
|
||||
if !strings.Contains(text, placeholder) {
|
||||
return fmt.Errorf("placeholder missing: %s", placeholder)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
37
openclaw/scripts/docs-i18n/order.go
Normal file
37
openclaw/scripts/docs-i18n/order.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"sort"
|
||||
)
|
||||
|
||||
type orderedFile struct {
|
||||
path string
|
||||
rel string
|
||||
}
|
||||
|
||||
func orderFiles(docsRoot string, files []string) ([]string, error) {
|
||||
entries := make([]orderedFile, 0, len(files))
|
||||
for _, file := range files {
|
||||
abs, err := filepath.Abs(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rel, err := filepath.Rel(docsRoot, abs)
|
||||
if err != nil {
|
||||
rel = abs
|
||||
}
|
||||
entries = append(entries, orderedFile{path: file, rel: rel})
|
||||
}
|
||||
if len(entries) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
sort.Slice(entries, func(i, j int) bool {
|
||||
return entries[i].rel < entries[j].rel
|
||||
})
|
||||
ordered := make([]string, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
ordered = append(ordered, entry.path)
|
||||
}
|
||||
return ordered, nil
|
||||
}
|
||||
30
openclaw/scripts/docs-i18n/placeholders.go
Normal file
30
openclaw/scripts/docs-i18n/placeholders.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type PlaceholderState struct {
|
||||
counter int
|
||||
used map[string]struct{}
|
||||
}
|
||||
|
||||
func NewPlaceholderState(text string) *PlaceholderState {
|
||||
used := map[string]struct{}{}
|
||||
for _, hit := range placeholderRe.FindAllString(text, -1) {
|
||||
used[hit] = struct{}{}
|
||||
}
|
||||
return &PlaceholderState{counter: 900000, used: used}
|
||||
}
|
||||
|
||||
func (s *PlaceholderState) Next() string {
|
||||
for {
|
||||
candidate := fmt.Sprintf("__OC_I18N_%d__", s.counter)
|
||||
s.counter++
|
||||
if _, ok := s.used[candidate]; ok {
|
||||
continue
|
||||
}
|
||||
s.used[candidate] = struct{}{}
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
202
openclaw/scripts/docs-i18n/process.go
Normal file
202
openclaw/scripts/docs-i18n/process.go
Normal file
@@ -0,0 +1,202 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
func processFile(ctx context.Context, translator *PiTranslator, tm *TranslationMemory, docsRoot, filePath, srcLang, tgtLang string) (bool, error) {
|
||||
absPath, relPath, err := resolveDocsPath(docsRoot, filePath)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
content, err := os.ReadFile(absPath)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
frontMatter, body := splitFrontMatter(string(content))
|
||||
frontData := map[string]any{}
|
||||
if frontMatter != "" {
|
||||
if err := yaml.Unmarshal([]byte(frontMatter), &frontData); err != nil {
|
||||
return false, fmt.Errorf("frontmatter parse failed for %s: %w", relPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := translateFrontMatter(ctx, translator, tm, frontData, relPath, srcLang, tgtLang); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
body, err = translateHTMLBlocks(ctx, translator, body, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
segments, err := extractSegments(body, relPath)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
namespace := cacheNamespace()
|
||||
for i := range segments {
|
||||
seg := &segments[i]
|
||||
seg.CacheKey = cacheKey(namespace, srcLang, tgtLang, seg.SegmentID, seg.TextHash)
|
||||
if entry, ok := tm.Get(seg.CacheKey); ok {
|
||||
seg.Translated = entry.Translated
|
||||
continue
|
||||
}
|
||||
translated, err := translator.Translate(ctx, seg.Text, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("translate failed (%s): %w", relPath, err)
|
||||
}
|
||||
seg.Translated = translated
|
||||
entry := TMEntry{
|
||||
CacheKey: seg.CacheKey,
|
||||
SegmentID: seg.SegmentID,
|
||||
SourcePath: relPath,
|
||||
TextHash: seg.TextHash,
|
||||
Text: seg.Text,
|
||||
Translated: translated,
|
||||
Provider: providerName,
|
||||
Model: modelVersion,
|
||||
SrcLang: srcLang,
|
||||
TgtLang: tgtLang,
|
||||
UpdatedAt: time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
tm.Put(entry)
|
||||
}
|
||||
|
||||
translatedBody := applyTranslations(body, segments)
|
||||
updatedFront, err := encodeFrontMatter(frontData, relPath, content)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
outputPath := filepath.Join(docsRoot, tgtLang, relPath)
|
||||
if err := os.MkdirAll(filepath.Dir(outputPath), 0o755); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
output := updatedFront + translatedBody
|
||||
return false, os.WriteFile(outputPath, []byte(output), 0o644)
|
||||
}
|
||||
|
||||
func splitFrontMatter(content string) (string, string) {
|
||||
if !strings.HasPrefix(content, "---") {
|
||||
return "", content
|
||||
}
|
||||
lines := strings.Split(content, "\n")
|
||||
if len(lines) < 2 {
|
||||
return "", content
|
||||
}
|
||||
endIndex := -1
|
||||
for i := 1; i < len(lines); i++ {
|
||||
if strings.TrimSpace(lines[i]) == "---" {
|
||||
endIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if endIndex == -1 {
|
||||
return "", content
|
||||
}
|
||||
front := strings.Join(lines[1:endIndex], "\n")
|
||||
body := strings.Join(lines[endIndex+1:], "\n")
|
||||
if strings.HasPrefix(body, "\n") {
|
||||
body = body[1:]
|
||||
}
|
||||
return front, body
|
||||
}
|
||||
|
||||
func encodeFrontMatter(frontData map[string]any, relPath string, source []byte) (string, error) {
|
||||
if frontData == nil {
|
||||
frontData = map[string]any{}
|
||||
}
|
||||
frontData["x-i18n"] = map[string]any{
|
||||
"source_path": relPath,
|
||||
"source_hash": hashBytes(source),
|
||||
"provider": providerName,
|
||||
"model": modelVersion,
|
||||
"workflow": workflowVersion,
|
||||
"generated_at": time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
encoded, err := yaml.Marshal(frontData)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return fmt.Sprintf("---\n%s---\n\n", string(encoded)), nil
|
||||
}
|
||||
|
||||
func translateFrontMatter(ctx context.Context, translator *PiTranslator, tm *TranslationMemory, data map[string]any, relPath, srcLang, tgtLang string) error {
|
||||
if len(data) == 0 {
|
||||
return nil
|
||||
}
|
||||
if summary, ok := data["summary"].(string); ok {
|
||||
translated, err := translateSnippet(ctx, translator, tm, relPath+":frontmatter:summary", summary, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
data["summary"] = translated
|
||||
}
|
||||
if title, ok := data["title"].(string); ok {
|
||||
translated, err := translateSnippet(ctx, translator, tm, relPath+":frontmatter:title", title, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
data["title"] = translated
|
||||
}
|
||||
if readWhen, ok := data["read_when"].([]any); ok {
|
||||
translated := make([]any, 0, len(readWhen))
|
||||
for idx, item := range readWhen {
|
||||
textValue, ok := item.(string)
|
||||
if !ok {
|
||||
translated = append(translated, item)
|
||||
continue
|
||||
}
|
||||
value, err := translateSnippet(ctx, translator, tm, fmt.Sprintf("%s:frontmatter:read_when:%d", relPath, idx), textValue, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
translated = append(translated, value)
|
||||
}
|
||||
data["read_when"] = translated
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func translateSnippet(ctx context.Context, translator *PiTranslator, tm *TranslationMemory, segmentID, textValue, srcLang, tgtLang string) (string, error) {
|
||||
if strings.TrimSpace(textValue) == "" {
|
||||
return textValue, nil
|
||||
}
|
||||
namespace := cacheNamespace()
|
||||
textHash := hashText(textValue)
|
||||
ck := cacheKey(namespace, srcLang, tgtLang, segmentID, textHash)
|
||||
if entry, ok := tm.Get(ck); ok {
|
||||
return entry.Translated, nil
|
||||
}
|
||||
translated, err := translator.Translate(ctx, textValue, srcLang, tgtLang)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
entry := TMEntry{
|
||||
CacheKey: ck,
|
||||
SegmentID: segmentID,
|
||||
SourcePath: segmentID,
|
||||
TextHash: textHash,
|
||||
Text: textValue,
|
||||
Translated: translated,
|
||||
Provider: providerName,
|
||||
Model: modelVersion,
|
||||
SrcLang: srcLang,
|
||||
TgtLang: tgtLang,
|
||||
UpdatedAt: time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
tm.Put(entry)
|
||||
return translated, nil
|
||||
}
|
||||
146
openclaw/scripts/docs-i18n/prompt.go
Normal file
146
openclaw/scripts/docs-i18n/prompt.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func prettyLanguageLabel(lang string) string {
|
||||
trimmed := strings.TrimSpace(lang)
|
||||
if trimmed == "" {
|
||||
return lang
|
||||
}
|
||||
switch {
|
||||
case strings.EqualFold(trimmed, "en"):
|
||||
return "English"
|
||||
case strings.EqualFold(trimmed, "zh-CN"):
|
||||
return "Simplified Chinese"
|
||||
case strings.EqualFold(trimmed, "ja-JP"):
|
||||
return "Japanese"
|
||||
default:
|
||||
return trimmed
|
||||
}
|
||||
}
|
||||
|
||||
func translationPrompt(srcLang, tgtLang string, glossary []GlossaryEntry) string {
|
||||
srcLabel := prettyLanguageLabel(srcLang)
|
||||
tgtLabel := prettyLanguageLabel(tgtLang)
|
||||
glossaryBlock := buildGlossaryPrompt(glossary)
|
||||
|
||||
switch {
|
||||
case strings.EqualFold(tgtLang, "zh-CN"):
|
||||
// Keep this prompt as stable as possible; it has lots of tuning baked into the wording.
|
||||
return strings.TrimSpace(fmt.Sprintf(zhCNPromptTemplate, srcLabel, tgtLabel, glossaryBlock))
|
||||
case strings.EqualFold(tgtLang, "ja-JP"):
|
||||
return strings.TrimSpace(fmt.Sprintf(jaJPPromptTemplate, srcLabel, tgtLabel, glossaryBlock))
|
||||
default:
|
||||
return strings.TrimSpace(fmt.Sprintf(genericPromptTemplate, srcLabel, tgtLabel, glossaryBlock))
|
||||
}
|
||||
}
|
||||
|
||||
const zhCNPromptTemplate = `You are a translation function, not a chat assistant.
|
||||
Translate from %s to %s.
|
||||
|
||||
Rules:
|
||||
- Output ONLY the translated text. No preamble, no questions, no commentary.
|
||||
- Translate all English prose; do not leave English unless it is code, a URL, or a product name.
|
||||
- All prose must be Chinese. If any English sentence remains outside code/URLs/product names, it is wrong.
|
||||
- If the input contains <frontmatter> and <body> tags, keep them exactly and output exactly one of each.
|
||||
- Translate only the contents inside those tags.
|
||||
- Preserve YAML structure inside <frontmatter>; translate only values.
|
||||
- Preserve all [[[FM_*]]] markers exactly and translate only the text between each START/END pair.
|
||||
- Translate headings/labels like "Exit codes" and "Optional scripts".
|
||||
- Preserve Markdown syntax exactly (headings, lists, tables, emphasis).
|
||||
- Preserve HTML tags and attributes exactly.
|
||||
- Do not translate code spans/blocks, config keys, CLI flags, or env vars.
|
||||
- Do not alter URLs or anchors.
|
||||
- Preserve placeholders exactly: __OC_I18N_####__.
|
||||
- Do not remove, reorder, or summarize content.
|
||||
- Use fluent, idiomatic technical Chinese; avoid slang or jokes.
|
||||
- Use neutral documentation tone; prefer “你/你的”, avoid “您/您的”.
|
||||
- Insert a space between Latin characters and CJK text (W3C CLREQ), e.g., “Gateway 网关”, “Skills 配置”.
|
||||
- Use Chinese quotation marks “ and ” for Chinese prose; keep ASCII quotes inside code spans/blocks or literal CLI/keys.
|
||||
- Keep product names in English: OpenClaw, Pi, WhatsApp, Telegram, Discord, iMessage, Slack, Microsoft Teams, Google Chat, Signal.
|
||||
- For the OpenClaw Gateway, use “Gateway 网关”.
|
||||
- Keep these terms in English: Skills, local loopback, Tailscale.
|
||||
- Never output an empty response; if unsure, return the source text unchanged.
|
||||
|
||||
%s
|
||||
|
||||
If the input is empty, output empty.
|
||||
If the input contains only placeholders, output it unchanged.`
|
||||
|
||||
const jaJPPromptTemplate = `You are a translation function, not a chat assistant.
|
||||
Translate from %s to %s.
|
||||
|
||||
Rules:
|
||||
- Output ONLY the translated text. No preamble, no questions, no commentary.
|
||||
- Translate all English prose; do not leave English unless it is code, a URL, or a product name.
|
||||
- All prose must be Japanese. If any English sentence remains outside code/URLs/product names, it is wrong.
|
||||
- If the input contains <frontmatter> and <body> tags, keep them exactly and output exactly one of each.
|
||||
- Translate only the contents inside those tags.
|
||||
- Preserve YAML structure inside <frontmatter>; translate only values.
|
||||
- Preserve all [[[FM_*]]] markers exactly and translate only the text between each START/END pair.
|
||||
- Translate headings/labels like "Exit codes" and "Optional scripts".
|
||||
- Preserve Markdown syntax exactly (headings, lists, tables, emphasis).
|
||||
- Preserve HTML tags and attributes exactly.
|
||||
- Do not translate code spans/blocks, config keys, CLI flags, or env vars.
|
||||
- Do not alter URLs or anchors.
|
||||
- Preserve placeholders exactly: __OC_I18N_####__.
|
||||
- Do not remove, reorder, or summarize content.
|
||||
- Use fluent, idiomatic technical Japanese; avoid slang or jokes.
|
||||
- Use neutral documentation tone; avoid overly formal honorifics (e.g., avoid “〜でございます”).
|
||||
- Use Japanese quotation marks 「 and 」 for Japanese prose; keep ASCII quotes inside code spans/blocks or literal CLI/keys.
|
||||
- Do not add or remove spacing around Latin text just because it borders Japanese; keep spacing stable unless required by Japanese grammar.
|
||||
- Keep product names in English: OpenClaw, Pi, WhatsApp, Telegram, Discord, iMessage, Slack, Microsoft Teams, Google Chat, Signal.
|
||||
- Keep these terms in English: Skills, local loopback, Tailscale.
|
||||
- Never output an empty response; if unsure, return the source text unchanged.
|
||||
|
||||
%s
|
||||
|
||||
If the input is empty, output empty.
|
||||
If the input contains only placeholders, output it unchanged.`
|
||||
|
||||
const genericPromptTemplate = `You are a translation function, not a chat assistant.
|
||||
Translate from %s to %s.
|
||||
|
||||
Rules:
|
||||
- Output ONLY the translated text. No preamble, no questions, no commentary.
|
||||
- Translate all English prose; do not leave English unless it is code, a URL, or a product name.
|
||||
- If any English sentence remains outside code/URLs/product names, it is likely wrong.
|
||||
- If the input contains <frontmatter> and <body> tags, keep them exactly and output exactly one of each.
|
||||
- Translate only the contents inside those tags.
|
||||
- Preserve YAML structure inside <frontmatter>; translate only values.
|
||||
- Preserve all [[[FM_*]]] markers exactly and translate only the text between each START/END pair.
|
||||
- Translate headings/labels like "Exit codes" and "Optional scripts".
|
||||
- Preserve Markdown syntax exactly (headings, lists, tables, emphasis).
|
||||
- Preserve HTML tags and attributes exactly.
|
||||
- Do not translate code spans/blocks, config keys, CLI flags, or env vars.
|
||||
- Do not alter URLs or anchors.
|
||||
- Preserve placeholders exactly: __OC_I18N_####__.
|
||||
- Do not remove, reorder, or summarize content.
|
||||
- Use fluent, idiomatic technical language in the target language; avoid slang or jokes.
|
||||
- Use neutral documentation tone.
|
||||
- Keep product names in English: OpenClaw, Pi, WhatsApp, Telegram, Discord, iMessage, Slack, Microsoft Teams, Google Chat, Signal.
|
||||
- Keep these terms in English: Skills, local loopback, Tailscale.
|
||||
- Never output an empty response; if unsure, return the source text unchanged.
|
||||
|
||||
%s
|
||||
|
||||
If the input is empty, output empty.
|
||||
If the input contains only placeholders, output it unchanged.`
|
||||
|
||||
func buildGlossaryPrompt(glossary []GlossaryEntry) string {
|
||||
if len(glossary) == 0 {
|
||||
return ""
|
||||
}
|
||||
var lines []string
|
||||
lines = append(lines, "Preferred translations (use when natural):")
|
||||
for _, entry := range glossary {
|
||||
if entry.Source == "" || entry.Target == "" {
|
||||
continue
|
||||
}
|
||||
lines = append(lines, fmt.Sprintf("- %s -> %s", entry.Source, entry.Target))
|
||||
}
|
||||
return strings.Join(lines, "\n")
|
||||
}
|
||||
11
openclaw/scripts/docs-i18n/segment.go
Normal file
11
openclaw/scripts/docs-i18n/segment.go
Normal file
@@ -0,0 +1,11 @@
|
||||
package main
|
||||
|
||||
type Segment struct {
|
||||
Start int
|
||||
Stop int
|
||||
Text string
|
||||
TextHash string
|
||||
SegmentID string
|
||||
Translated string
|
||||
CacheKey string
|
||||
}
|
||||
132
openclaw/scripts/docs-i18n/tm.go
Normal file
132
openclaw/scripts/docs-i18n/tm.go
Normal file
@@ -0,0 +1,132 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type TMEntry struct {
|
||||
CacheKey string `json:"cache_key"`
|
||||
SegmentID string `json:"segment_id"`
|
||||
SourcePath string `json:"source_path"`
|
||||
TextHash string `json:"text_hash"`
|
||||
Text string `json:"text"`
|
||||
Translated string `json:"translated"`
|
||||
Provider string `json:"provider"`
|
||||
Model string `json:"model"`
|
||||
SrcLang string `json:"src_lang"`
|
||||
TgtLang string `json:"tgt_lang"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
type TranslationMemory struct {
|
||||
path string
|
||||
entries map[string]TMEntry
|
||||
}
|
||||
|
||||
func LoadTranslationMemory(path string) (*TranslationMemory, error) {
|
||||
tm := &TranslationMemory{path: path, entries: map[string]TMEntry{}}
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return tm, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
reader := bufio.NewReader(file)
|
||||
for {
|
||||
line, err := reader.ReadBytes('\n')
|
||||
if len(line) > 0 {
|
||||
trimmed := strings.TrimSpace(string(line))
|
||||
if trimmed != "" {
|
||||
var entry TMEntry
|
||||
if err := json.Unmarshal([]byte(trimmed), &entry); err != nil {
|
||||
return nil, fmt.Errorf("translation memory decode failed: %w", err)
|
||||
}
|
||||
if entry.CacheKey != "" && strings.TrimSpace(entry.Translated) != "" {
|
||||
tm.entries[entry.CacheKey] = entry
|
||||
}
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return tm, nil
|
||||
}
|
||||
|
||||
func (tm *TranslationMemory) Get(cacheKey string) (TMEntry, bool) {
|
||||
entry, ok := tm.entries[cacheKey]
|
||||
if !ok {
|
||||
return TMEntry{}, false
|
||||
}
|
||||
if strings.TrimSpace(entry.Translated) == "" {
|
||||
return TMEntry{}, false
|
||||
}
|
||||
return entry, true
|
||||
}
|
||||
|
||||
func (tm *TranslationMemory) Put(entry TMEntry) {
|
||||
if entry.CacheKey == "" {
|
||||
return
|
||||
}
|
||||
tm.entries[entry.CacheKey] = entry
|
||||
}
|
||||
|
||||
func (tm *TranslationMemory) Save() error {
|
||||
if tm.path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(tm.path), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
tmpPath := tm.path + ".tmp"
|
||||
file, err := os.Create(tmpPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
keys := make([]string, 0, len(tm.entries))
|
||||
for key := range tm.entries {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
writer := bufio.NewWriter(file)
|
||||
for _, key := range keys {
|
||||
entry := tm.entries[key]
|
||||
payload, err := json.Marshal(entry)
|
||||
if err != nil {
|
||||
_ = file.Close()
|
||||
return err
|
||||
}
|
||||
if _, err := writer.Write(payload); err != nil {
|
||||
_ = file.Close()
|
||||
return err
|
||||
}
|
||||
if _, err := writer.WriteString("\n"); err != nil {
|
||||
_ = file.Close()
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := writer.Flush(); err != nil {
|
||||
_ = file.Close()
|
||||
return err
|
||||
}
|
||||
if err := file.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Rename(tmpPath, tm.path)
|
||||
}
|
||||
247
openclaw/scripts/docs-i18n/translator.go
Normal file
247
openclaw/scripts/docs-i18n/translator.go
Normal file
@@ -0,0 +1,247 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
pi "github.com/joshp123/pi-golang"
|
||||
)
|
||||
|
||||
const (
|
||||
translateMaxAttempts = 3
|
||||
translateBaseDelay = 15 * time.Second
|
||||
)
|
||||
|
||||
var errEmptyTranslation = errors.New("empty translation")
|
||||
|
||||
type PiTranslator struct {
|
||||
client *pi.OneShotClient
|
||||
}
|
||||
|
||||
func NewPiTranslator(srcLang, tgtLang string, glossary []GlossaryEntry, thinking string) (*PiTranslator, error) {
|
||||
options := pi.DefaultOneShotOptions()
|
||||
options.AppName = "openclaw-docs-i18n"
|
||||
options.WorkDir = "/tmp"
|
||||
options.Mode = pi.ModeDragons
|
||||
options.Dragons = pi.DragonsOptions{
|
||||
Provider: "anthropic",
|
||||
Model: modelVersion,
|
||||
Thinking: normalizeThinking(thinking),
|
||||
}
|
||||
options.SystemPrompt = translationPrompt(srcLang, tgtLang, glossary)
|
||||
client, err := pi.StartOneShot(options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &PiTranslator{client: client}, nil
|
||||
}
|
||||
|
||||
func (t *PiTranslator) Translate(ctx context.Context, text, srcLang, tgtLang string) (string, error) {
|
||||
return t.translate(ctx, text, t.translateMasked)
|
||||
}
|
||||
|
||||
func (t *PiTranslator) TranslateRaw(ctx context.Context, text, srcLang, tgtLang string) (string, error) {
|
||||
return t.translate(ctx, text, t.translateRaw)
|
||||
}
|
||||
|
||||
func (t *PiTranslator) translate(ctx context.Context, text string, run func(context.Context, string) (string, error)) (string, error) {
|
||||
if t.client == nil {
|
||||
return "", errors.New("pi client unavailable")
|
||||
}
|
||||
prefix, core, suffix := splitWhitespace(text)
|
||||
if core == "" {
|
||||
return text, nil
|
||||
}
|
||||
translated, err := t.translateWithRetry(ctx, func(ctx context.Context) (string, error) {
|
||||
return run(ctx, core)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return prefix + translated + suffix, nil
|
||||
}
|
||||
|
||||
func (t *PiTranslator) translateWithRetry(ctx context.Context, run func(context.Context) (string, error)) (string, error) {
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < translateMaxAttempts; attempt++ {
|
||||
translated, err := run(ctx)
|
||||
if err == nil {
|
||||
return translated, nil
|
||||
}
|
||||
if !isRetryableTranslateError(err) {
|
||||
return "", err
|
||||
}
|
||||
lastErr = err
|
||||
if attempt+1 < translateMaxAttempts {
|
||||
delay := translateBaseDelay * time.Duration(attempt+1)
|
||||
if err := sleepWithContext(ctx, delay); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", lastErr
|
||||
}
|
||||
|
||||
func (t *PiTranslator) translateMasked(ctx context.Context, core string) (string, error) {
|
||||
state := NewPlaceholderState(core)
|
||||
placeholders := make([]string, 0, 8)
|
||||
mapping := map[string]string{}
|
||||
masked := maskMarkdown(core, state.Next, &placeholders, mapping)
|
||||
resText, err := runPrompt(ctx, t.client, masked)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
translated := strings.TrimSpace(resText)
|
||||
if translated == "" {
|
||||
return "", errEmptyTranslation
|
||||
}
|
||||
if err := validatePlaceholders(translated, placeholders); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return unmaskMarkdown(translated, placeholders, mapping), nil
|
||||
}
|
||||
|
||||
func (t *PiTranslator) translateRaw(ctx context.Context, core string) (string, error) {
|
||||
resText, err := runPrompt(ctx, t.client, core)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
translated := strings.TrimSpace(resText)
|
||||
if translated == "" {
|
||||
return "", errEmptyTranslation
|
||||
}
|
||||
return translated, nil
|
||||
}
|
||||
|
||||
func isRetryableTranslateError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if errors.Is(err, errEmptyTranslation) {
|
||||
return true
|
||||
}
|
||||
message := strings.ToLower(err.Error())
|
||||
return strings.Contains(message, "placeholder missing") || strings.Contains(message, "rate limit") || strings.Contains(message, "429")
|
||||
}
|
||||
|
||||
func sleepWithContext(ctx context.Context, delay time.Duration) error {
|
||||
timer := time.NewTimer(delay)
|
||||
defer timer.Stop()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-timer.C:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (t *PiTranslator) Close() {
|
||||
if t.client != nil {
|
||||
_ = t.client.Close()
|
||||
}
|
||||
}
|
||||
|
||||
type agentEndPayload struct {
|
||||
Messages []agentMessage `json:"messages"`
|
||||
}
|
||||
|
||||
type agentMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content json.RawMessage `json:"content"`
|
||||
StopReason string `json:"stopReason,omitempty"`
|
||||
ErrorMessage string `json:"errorMessage,omitempty"`
|
||||
}
|
||||
|
||||
type contentBlock struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text,omitempty"`
|
||||
}
|
||||
|
||||
func runPrompt(ctx context.Context, client *pi.OneShotClient, message string) (string, error) {
|
||||
events, cancel := client.Subscribe(256)
|
||||
defer cancel()
|
||||
|
||||
if err := client.Prompt(ctx, message); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return "", ctx.Err()
|
||||
case event, ok := <-events:
|
||||
if !ok {
|
||||
return "", errors.New("event stream closed")
|
||||
}
|
||||
if event.Type == "agent_end" {
|
||||
return extractTranslationResult(event.Raw)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func extractTranslationResult(raw json.RawMessage) (string, error) {
|
||||
var payload agentEndPayload
|
||||
if err := json.Unmarshal(raw, &payload); err != nil {
|
||||
return "", err
|
||||
}
|
||||
for index := len(payload.Messages) - 1; index >= 0; index-- {
|
||||
message := payload.Messages[index]
|
||||
if message.Role != "assistant" {
|
||||
continue
|
||||
}
|
||||
if message.ErrorMessage != "" || strings.EqualFold(message.StopReason, "error") {
|
||||
msg := strings.TrimSpace(message.ErrorMessage)
|
||||
if msg == "" {
|
||||
msg = "unknown error"
|
||||
}
|
||||
return "", fmt.Errorf("pi error: %s", msg)
|
||||
}
|
||||
text, err := extractContentText(message.Content)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return text, nil
|
||||
}
|
||||
return "", errors.New("assistant message not found")
|
||||
}
|
||||
|
||||
func extractContentText(content json.RawMessage) (string, error) {
|
||||
trimmed := strings.TrimSpace(string(content))
|
||||
if trimmed == "" {
|
||||
return "", nil
|
||||
}
|
||||
if strings.HasPrefix(trimmed, "\"") {
|
||||
var text string
|
||||
if err := json.Unmarshal(content, &text); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return text, nil
|
||||
}
|
||||
|
||||
var blocks []contentBlock
|
||||
if err := json.Unmarshal(content, &blocks); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var parts []string
|
||||
for _, block := range blocks {
|
||||
if block.Type == "text" && block.Text != "" {
|
||||
parts = append(parts, block.Text)
|
||||
}
|
||||
}
|
||||
return strings.Join(parts, ""), nil
|
||||
}
|
||||
|
||||
func normalizeThinking(value string) string {
|
||||
switch strings.ToLower(strings.TrimSpace(value)) {
|
||||
case "low", "high":
|
||||
return strings.ToLower(strings.TrimSpace(value))
|
||||
default:
|
||||
return "high"
|
||||
}
|
||||
}
|
||||
81
openclaw/scripts/docs-i18n/util.go
Normal file
81
openclaw/scripts/docs-i18n/util.go
Normal file
@@ -0,0 +1,81 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
workflowVersion = 15
|
||||
providerName = "pi"
|
||||
modelVersion = "claude-opus-4-6"
|
||||
)
|
||||
|
||||
func cacheNamespace() string {
|
||||
return fmt.Sprintf("wf=%d|provider=%s|model=%s", workflowVersion, providerName, modelVersion)
|
||||
}
|
||||
|
||||
func cacheKey(namespace, srcLang, tgtLang, segmentID, textHash string) string {
|
||||
raw := fmt.Sprintf("%s|%s|%s|%s|%s", namespace, srcLang, tgtLang, segmentID, textHash)
|
||||
hash := sha256.Sum256([]byte(raw))
|
||||
return hex.EncodeToString(hash[:])
|
||||
}
|
||||
|
||||
func hashText(text string) string {
|
||||
normalized := normalizeText(text)
|
||||
hash := sha256.Sum256([]byte(normalized))
|
||||
return hex.EncodeToString(hash[:])
|
||||
}
|
||||
|
||||
func hashBytes(data []byte) string {
|
||||
hash := sha256.Sum256(data)
|
||||
return hex.EncodeToString(hash[:])
|
||||
}
|
||||
|
||||
func normalizeText(text string) string {
|
||||
return strings.Join(strings.Fields(strings.TrimSpace(text)), " ")
|
||||
}
|
||||
|
||||
func segmentID(relPath, textHash string) string {
|
||||
shortHash := textHash
|
||||
if len(shortHash) > 16 {
|
||||
shortHash = shortHash[:16]
|
||||
}
|
||||
return fmt.Sprintf("%s:%s", relPath, shortHash)
|
||||
}
|
||||
|
||||
func splitWhitespace(text string) (string, string, string) {
|
||||
if text == "" {
|
||||
return "", "", ""
|
||||
}
|
||||
start := 0
|
||||
for start < len(text) && isWhitespace(text[start]) {
|
||||
start++
|
||||
}
|
||||
end := len(text)
|
||||
for end > start && isWhitespace(text[end-1]) {
|
||||
end--
|
||||
}
|
||||
return text[:start], text[start:end], text[end:]
|
||||
}
|
||||
|
||||
func isWhitespace(b byte) bool {
|
||||
switch b {
|
||||
case ' ', '\t', '\n', '\r':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func fatal(err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
_, _ = io.WriteString(os.Stderr, err.Error()+"\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
Reference in New Issue
Block a user