67 lines
1.7 KiB
C#
67 lines
1.7 KiB
C#
using System;
|
|
using System.Xml.Linq;
|
|
using System.Linq;
|
|
using System.Web;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using static System.Runtime.InteropServices.JavaScript.JSType;
|
|
|
|
namespace xmlparser
|
|
{
|
|
class Program
|
|
{
|
|
static void Main(string[] args)
|
|
{
|
|
|
|
XNamespace ns = "http://ijr.hu/schema/jogszabaly.xsd";
|
|
|
|
string[] files = Directory.GetFiles(@"xml_files", "*.xml");
|
|
|
|
string resultText = "";
|
|
|
|
foreach (var filepath in files)
|
|
{
|
|
|
|
XDocument doc = XDocument.Load(filepath);
|
|
|
|
var content = doc.Descendants(XName.Get("tartalom", ns.ToString())).FirstOrDefault();
|
|
|
|
resultText += ExtractText(content);
|
|
|
|
}
|
|
resultText = RemoveTagsFromXmlLikeString(resultText);
|
|
resultText = RemoveExtraSpaces(resultText);
|
|
|
|
File.WriteAllText("output.txt", resultText);
|
|
}
|
|
|
|
|
|
static string ExtractText(XElement element)
|
|
{
|
|
var textBuilder = new StringBuilder();
|
|
|
|
foreach (var node in element.DescendantNodesAndSelf())
|
|
{
|
|
textBuilder.Append((node as XText)?.Value ?? (node as XCData)?.Value);
|
|
textBuilder.Append(" ");
|
|
}
|
|
|
|
return textBuilder.ToString();
|
|
}
|
|
|
|
static string RemoveTagsFromXmlLikeString(string input)
|
|
{
|
|
string pattern = @"<[^>]+?>";
|
|
return Regex.Replace(input, pattern, "");
|
|
}
|
|
|
|
public static string RemoveExtraSpaces(string sender)
|
|
{
|
|
const RegexOptions options = RegexOptions.None;
|
|
var regex = new Regex("[ ]{2,}", options);
|
|
return regex.Replace(sender, " ").Trim();
|
|
}
|
|
|
|
}
|
|
}
|