win32: UTF #6 - clamd proto

remotes/push_mirror/cliemu_dump
aCaB 14 years ago
parent c68a45e127
commit 8ab47ebe4e
  1. 5
      ChangeLog
  2. 2
      clamd/session.c
  3. 28
      libclamav/scanners.c
  4. 2
      platform.h.in
  5. 2
      shared/output.c
  6. 46
      win32/README
  7. 12
      win32/compat/glob.c
  8. 86
      win32/compat/utf8_util.c
  9. 27
      win32/compat/utf8_util.h
  10. 3
      win32/libclamav.def
  11. 4
      win32/libclamav.vcxproj
  12. 24
      win32/libclamav.vcxproj.filters
  13. 1
      win32/platform.h

@ -1,3 +1,8 @@
Mon Apr 18 17:23:10 CEST 2011 (acab)
------------------------------------
* clamd, win32: Conversion to UTF8 is hopefully completefinal.
See win32/README for important changes! (bb#2343)
Sun Apr 17 16:09:28 CEST 2011 (acab)
------------------------------------
* win32/compat: more UTF8 stuff. Things *appear* to be working ok now.

@ -420,7 +420,7 @@ static int dispatch_command(client_conn_t *conn, enum commands cmd, const char *
case COMMAND_SCAN:
case COMMAND_CONTSCAN:
case COMMAND_MULTISCAN:
dup_conn->filename = strdup(argument);
dup_conn->filename = cli_strdup_to_utf8(argument);
if (!dup_conn->filename) {
logg("!Failed to allocate memory for filename\n");
ret = -1;

@ -2701,31 +2701,17 @@ int cl_scanfile(const char *filename, const char **virname, unsigned long int *s
int cl_scanfile_callback(const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, unsigned int scanoptions, void *context)
{
int fd, ret;
#ifdef _WIN32
char utf8[PATH_MAX+1];
wchar_t tmpw[PATH_MAX+1];
char *fname = cli_to_utf8_maybe_alloc(filename);
while(1) {
/* Try UTF8 input first */
if(MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, filename, -1, tmpw, PATH_MAX)) {
/* XP acts funny on MB_ERR_INVALID_CHARS, so we translate back and compare */
if(WideCharToMultiByte(CP_UTF8, 0, tmpw, -1, utf8, PATH_MAX, NULL, NULL) && !strcmp(filename, utf8))
break;
}
/* Then assume ACP */
if(MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, filename, -1, tmpw, PATH_MAX)) {
if(WideCharToMultiByte(CP_UTF8, 0, tmpw, -1, utf8, PATH_MAX, NULL, NULL)) {
filename = utf8;
break;
}
}
cli_errmsg("cl_scanfile_callback: Can't translate %s to UTF-8\n", filename);
if(!fname)
return CL_EARG;
}
#endif
if((fd = safe_open(filename, O_RDONLY|O_BINARY)) == -1)
if((fd = safe_open(fname, O_RDONLY|O_BINARY)) == -1)
return CL_EOPEN;
if(fname != filename)
free(fname);
ret = cl_scandesc_callback(fd, virname, scanned, engine, scanoptions, context);
close(fd);

@ -40,6 +40,8 @@ typedef unsigned int in_addr_t;
/* Nothing is safe in windows, not even open */
#define safe_open open
#define cli_to_utf8_maybe_alloc(x) (x)
#define cli_strdup_to_utf8(x) strdup(x)
#ifndef WORDS_BIGENDIAN
#define WORDS_BIGENDIAN 0
#endif

@ -450,7 +450,7 @@ void mprintf(const char *str, ...)
break;
}
/* FIXME CHECK IT'S REALLY UTF8 */
nubuff = malloc(tmplen);
nubuff = (char *)malloc(tmplen);
if(!nubuff) {
free(tmpw);
break;

@ -1,7 +1,26 @@
ClamAV for Win32
----------------
0- Requirements
--- News ---
Starting from version 0.98 the windows version of ClamAV requires all the
input to be UTF-8 encoded.
This affects:
- the API, notably the cl_scanfile() function
- clamd socket input, e.g. the commands SCAN, CONTSCAN, MUTLISCAN, etc.
- clamd socket output, i.e replies to the above queries
For legacy reasons ANSI (i.e. CP_ACP) input will still be accepted and
processed as before, but with two important remarks:
First, socket replies to ANSI queries will still be UTF-8 encoded.
Second, ANSI sequences which are also valid UTF-8 sequences will be handled
as UTF-8.
As a side note, console output (stdin and stderr) will always be OEM encoded,
even when redirected to a file.
--- Requirements ---
To build the source code you will need:
- Git for windows with a git "shell"
@ -11,7 +30,8 @@ compile the 64bit target; this configuration is therefore not supported.
To run the binaries at least Windows XP is required.
1- Getting the code
--- Getting the code ---
The win32 source code is merged in the ClamAV repository and is available
via git.
@ -19,14 +39,14 @@ Clone the repository with: git clone git://git.clamav.net/git/clamav-devel
See http://www.clamav.net/download/sources for more info.
2- Code configuration
--- Code configuration ---
After downloading the source code, minimal configuration is required:
just run the win32/configure.bat script *from within the git shell*.
Skip this step if you are building from an official release tarball.
3- Compilation
--- Compilation ---
Open win32/ClamAV.sln in Visual Studio and build all.
The output directory for the binaries is either /win32/(Win32|x64)/Debug or
@ -37,24 +57,28 @@ https://connect.microsoft.com/VisualStudio/feedback/details/556158
Use MSBuild instead.
4- Special notes
--- Special notes ---
The ClamAV tools in win32 are the same as in unix, so refer to their respective
manpage for general usage.
The major differences are listed below:
- Config files path search order:
1- The content of the registry key "HKEY_LOCAL_MACHINE/Software/ClamAV/ConfDir"
1- The content of the registry key
"HKEY_LOCAL_MACHINE/Software/ClamAV/ConfDir"
2- The directory where libclamav.dll is located
3- "C:\ClamAV"
- Database files path search order:
1- The content of the registry key "HKEY_LOCAL_MACHINE/Software/ClamAV/DataDir"
2- The directory "database" inside the directory where libclamav.dll is located
1- The content of the registry key
"HKEY_LOCAL_MACHINE/Software/ClamAV/DataDir"
2- The directory "database" inside the directory where libclamav.dll is
located
3- "C:\ClamAV\db"
- Globbing
Since the windows command prompt doesn't take care of wildcard expansion, minimal
emulation of unix glob() is performed internally. It supports "*" and "?" only.
Since the windows command prompt doesn't take care of wildcard expansion,
minimal emulation of unix glob() is performed internally.
It supports "*" and "?" only.
- File paths
Please always use the backslash as the path separator.
@ -66,7 +90,7 @@ chunks; such builds won't be able to handle large databases.
Just do yourself a favour and always build in release mode.
5- Special thanks
--- Special thanks ---
Special thanks to Gianluigi Tiesi and Mark Pizzolato for their valuable help in
coding and testing.

@ -67,7 +67,7 @@
#define wrapper.
*/
static int glob_add(const char *path, int *argc, char ***argv) {
static int glob_add(char *path, int *argc, char ***argv) {
char *tail = strchr(path, '*'), *tailqmark;
char *dup1, *dup2, *dir, *base, *taildirsep, *tailwldsep;
struct dirent *de;
@ -123,8 +123,11 @@ static int glob_add(const char *path, int *argc, char ***argv) {
if(!tailwldsep)
tailwldsep = tail + taillen;
dup1 = strdup(path);
dup2 = strdup(path);
baselen = strlen(path) + 1;
dup1 = (char *)_alloca(baselen * 2);
memcpy(dup1, path, baselen);
dup2 = dup1 + baselen;
memcpy(dup2, path, baselen);
if(!mergedir) {
dir = dirname(dup1);
@ -177,8 +180,7 @@ static int glob_add(const char *path, int *argc, char ***argv) {
}
}
if(d) closedir(d);
free(dup1);
free(dup2);
_freea(dup1);
free(path);
return outlen;
}

@ -0,0 +1,86 @@
/*
* Copyright (C) 2011 Sourcefire, Inc.
*
* Authors: aCaB <acab@clamav.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include "utf8_util.h"
char *cli_strdup_to_utf8(const char *s) {
char *r = cli_to_utf8_maybe_alloc(s);
if(!r) return NULL;
if(r == s) return strdup(r);
return r;
}
#define MAYBE_FREE_W do { if(wdup != tmpw) free(wdup); } while (0)
#define MAYBE_FREE_U do { if(utf8 != tmpu) free(utf8); } while (0)
char *cli_to_utf8_maybe_alloc(const char *s) {
int len = strlen(s) + 1;
wchar_t tmpw[1024], *wdup;
char tmpu[1024], *utf8;
if(len >= sizeof(tmpw) / sizeof(*tmpw)) {
wdup = (wchar_t *)malloc(len * sizeof(wchar_t));
if(!wdup) return NULL;
} else
wdup = tmpw;
/* Check if already UTF8 first... */
if(MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, wdup, len)) {
/* XP acts funny on MB_ERR_INVALID_CHARS, so we translate back and compare
On Vista+ the flag is honored and there is no such overhead */
int ulen;
if((ulen = WideCharToMultiByte(CP_UTF8, 0, wdup, -1, NULL, 0, NULL, NULL))) {
if(ulen > sizeof(tmpu)) {
utf8 = (char *)malloc(ulen);
if(!utf8) {
MAYBE_FREE_W;
return NULL;
}
} else
utf8 = tmpu;
if(WideCharToMultiByte(CP_UTF8, 0, wdup, -1, utf8, ulen, NULL, NULL) && !strcmp(s, utf8)) {
MAYBE_FREE_W;
MAYBE_FREE_U;
return s;
}
MAYBE_FREE_U;
}
/* We should never land here */
}
/* ... then assume ANSI */
if(MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, s, -1, wdup, len)) {
if((len = WideCharToMultiByte(CP_UTF8, 0, wdup, -1, NULL, 0, NULL, NULL))) {
if((utf8 = (char *)malloc(len))) {
if(WideCharToMultiByte(CP_UTF8, 0, wdup, -1, utf8, len, NULL, NULL)) {
MAYBE_FREE_W;
return utf8;
}
free(utf8);
}
}
}
MAYBE_FREE_W;
return NULL;
}

@ -0,0 +1,27 @@
/*
* Copyright (C) 2011 Sourcefire, Inc.
*
* Authors: aCaB <acab@clamav.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef __UTF8_UTIL_H
#define __UTF8_UTIL_H
char *cli_to_utf8_maybe_alloc(const char *s);
char *cli_strdup_to_utf8(const char *s);
#endif

@ -212,4 +212,5 @@ EXPORTS w32_strerror @44341 NONAME
EXPORTS w32_strerror_r @44342 NONAME
EXPORTS inet_addr @44343 NONAME
EXPORTS fcntl @44344 NONAME
EXPORTS cli_to_utf8_maybe_alloc @44345 NONAME
EXPORTS cli_strdup_to_utf8 @44346 NONAME

@ -1,5 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@ -333,6 +332,7 @@
<ClCompile Include="compat\net.c"/>
<ClCompile Include="compat\random.c"/>
<ClCompile Include="compat\snprintf.c"/>
<ClCompile Include="compat\utf8_util.c" />
<ClCompile Include="compat\w32_errno.c"/>
<ClCompile Include="compat\w32_stat.c"/>
</ItemGroup>
@ -356,4 +356,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets"/>
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

@ -159,9 +159,6 @@
<ClCompile Include="..\libclamav\matcher-bm.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\matcher-md5.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\mbox.c">
<Filter>Source Files</Filter>
</ClCompile>
@ -477,5 +474,26 @@
<ClCompile Include="3rdparty\zlib\gzwrite.c">
<Filter>Source Files\zlib</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\png.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\jpeg.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\swf.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\matcher-hash.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\sha1.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\events.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="compat\utf8_util.c">
<Filter>Source Files\compat</Filter>
</ClCompile>
</ItemGroup>
</Project>

@ -22,6 +22,7 @@ extern "C"
#include "w32_errno.h"
#include "w32_stat.h"
#include "random.h"
#include "utf8_util.h"
#ifdef __cplusplus
}

Loading…
Cancel
Save