first commit
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
out/
|
||||
62
app.ts
Normal file
62
app.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
export default class App {
|
||||
input: HTMLInputElement;
|
||||
outputContainer: HTMLTextAreaElement;
|
||||
|
||||
constructor() {
|
||||
this.input = document.getElementById('in') as HTMLInputElement;
|
||||
this.outputContainer = document.getElementById('out') as HTMLTextAreaElement;
|
||||
(document.getElementById("run") as HTMLButtonElement).onclick = this.run.bind(this);
|
||||
}
|
||||
|
||||
async run() {
|
||||
const promises: Promise<string>[] = [];
|
||||
for (const file of this.input.files!) {
|
||||
promises.push(this.parseFile(file));
|
||||
}
|
||||
|
||||
const parts = await Promise.all(promises);
|
||||
|
||||
const output = new Blob([parts.join(' ')], { type: 'text/plain' });
|
||||
const a = document.createElement('a');
|
||||
a.href = URL.createObjectURL(output);
|
||||
a.download = "result.txt";
|
||||
a.click();
|
||||
}
|
||||
|
||||
async parseFile(file: File) {
|
||||
const xml = await file.text();
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(xml, "text/xml");
|
||||
|
||||
const results: string[] = [];
|
||||
|
||||
for (const node of doc.getElementsByTagNameNS('*', 'tartalom')) {
|
||||
let resolve: (value: string) => void, reject: (reason?: any) => void;
|
||||
const resultPromise = new Promise<string>((res, rej) => {
|
||||
resolve = res, reject = rej;
|
||||
});
|
||||
|
||||
const worker = new Worker('worker.js');
|
||||
|
||||
worker.onmessage = (e) => {
|
||||
resolve(e.data);
|
||||
};
|
||||
|
||||
worker.onerror = (e) => {
|
||||
reject(e.error);
|
||||
}
|
||||
|
||||
worker.postMessage(node.textContent);
|
||||
|
||||
try {
|
||||
results.push(await resultPromise);
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
return results.join(' ');
|
||||
}
|
||||
}
|
||||
|
||||
new App();
|
||||
37
index.html
Normal file
37
index.html
Normal file
@@ -0,0 +1,37 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<meta name="description" content="XML Cleaner">
|
||||
<title>XML Cleaner</title>
|
||||
<style>
|
||||
body,
|
||||
html {
|
||||
margin: auto;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
body {
|
||||
color: white;
|
||||
background-color: black;
|
||||
display: flex;
|
||||
max-width: 800px;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
textarea {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<input type="file" id="in" multiple accept="text/xml"><button id="run">RUN</button>
|
||||
<script type="module" src="out/app.js"></script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
16
readme.md
Normal file
16
readme.md
Normal file
@@ -0,0 +1,16 @@
|
||||
# XML Cleaner
|
||||
|
||||
Simple webapp to clean up XML datasets.
|
||||
|
||||
Currently hardcoded to find the `tartalom` tags in multiple files,
|
||||
and dump their text content into a single txt file.
|
||||
|
||||
Removes all xml tags contained in CDATA strings, along with unnecessary whitespaces.
|
||||
|
||||
## Building
|
||||
1. Run the Typescript compiler.
|
||||
```
|
||||
tsc
|
||||
```
|
||||
|
||||
2. Deploy `index.html` and the `out` folder to a webserver.
|
||||
11
tsconfig.json
Normal file
11
tsconfig.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"outDir": "./out",
|
||||
"esModuleInterop": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"strict": true,
|
||||
"skipLibCheck": true
|
||||
}
|
||||
}
|
||||
14
worker.ts
Normal file
14
worker.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
const singleTagRegEx = /<[^>]+?>/g;
|
||||
const whitespaceRegEx = /(\s)\s+/g;
|
||||
|
||||
function clean(text: string): string {
|
||||
text = text.replaceAll(whitespaceRegEx, "$1");
|
||||
text = text.replaceAll(singleTagRegEx, "");
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
onmessage = (e) => {
|
||||
const result = clean(e.data);
|
||||
postMessage(result);
|
||||
}
|
||||
Reference in New Issue
Block a user